View Javadoc

1   package uk.ac.ebi.intenz.webapp.controller;
2   
3   import java.io.IOException;
4   import java.sql.PreparedStatement;
5   import java.sql.ResultSet;
6   import java.sql.SQLException;
7   import java.util.*;
8   import java.util.regex.Matcher;
9   import java.util.regex.Pattern;
10  import javax.servlet.ServletException;
11  
12  import org.apache.log4j.Logger;
13  import uk.ac.ebi.biobabel.util.WebUtil;
14  import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber;
15  import uk.ac.ebi.intenz.domain.exceptions.EcException;
16  import uk.ac.ebi.intenz.webapp.exceptions.QueryException;
17  import uk.ac.ebi.intenz.webapp.utilities.IntEnzMessenger;
18  import uk.ac.ebi.xchars.SpecialCharacters;
19  import uk.ac.ebi.xchars.domain.EncodingType;
20  import uk.ac.ebi.xchars.exceptions.InvalidUTF8OctetSequenceException;
21  
22  /**
23   * This class processes all full text queries.
24   * <p/>
25   * All queries are processed using the UTF-8 encoded (octet sequences) representation sent by browsers.
26   * Rather than using <code>request.getParameter(...)</code> this class decodes the query string manually, since
27   * <code>request.getParameter(...)</code> does not decode the octet sequences correctly, if special characters encoded
28   * as two or more octets are transmitted.<br/>
29   * See the <code>XChars</code> library documentation for more information regarding UTF-8 octet sequence decoding.
30   * <p/>
31   * <p/>
32   * TODO: Implement a query parser.
33   *
34   * @author Michael Darsow
35   * @version 2.0 - 13-July-2004
36   */
37  public class SearchCommand extends DatabaseCommand {
38  
39      public static final Logger LOGGER = Logger.getLogger(SearchCommand.class);
40  
41    private static final String COLUMNS =
42        "enzyme_id, ec, common_name, status, text, text_order";
43  
44    /**
45     * Returns the SQL statement for full text searching using a text index (ORACLE interMedia Text).
46     *
47     * @return the SQL statement.
48     */
49    private String fulltextQueryStatement() {
50      return "SELECT /*+ FIRST_ROWS */ score(1) score, " + COLUMNS +
51              " FROM enzyme.intenz_text" +
52              " WHERE CONTAINS (text, ?, 1) > 0" +
53              " ORDER BY score(1) DESC";
54    }
55  
56    /**
57     * Processes the full text query.
58     * <p/>
59     * The query string is parsed, decoded, checked and integrated into a SQL statement.
60     * After execution the result is being sent to the browser (if any).
61     * Exceptions regarding this process are caught and the user is informed accordingly.
62     *
63     * @throws ServletException ...
64     * @throws IOException      ...
65     */
66    public void process() throws ServletException, IOException {
67        int groupSize = 10;
68        try {
69            groupSize = Integer.parseInt(config.getPageSize());
70        } catch (NumberFormatException e) {
71            LOGGER.error("Bad page size: " + config.getPageSize(), e);
72        }
73        String query = null;
74      StringBuffer userFriendlyQuery = null;
75      String userFriendlyQueryTF = null;
76  
77      // Decode the UTF-8 octets sent by the client.
78      try {
79        query = decodeQuery(request.getQueryString(), false);
80        userFriendlyQuery = new StringBuffer(typifyQuery(query, QueryType.valueOf(request.getParameter("t"))).trim());
81        userFriendlyQueryTF = new String(query);
82      } catch (InvalidUTF8OctetSequenceException e) {
83        request.setAttribute("query", "");
84        request.setAttribute("message", e.getMessage());
85        forward("/noResult.jsp"); // No result found.
86        return;
87      }
88  
89      // Catch empty queries.
90      if (query == null || query.equals("")) {
91        request.setAttribute("query", "");
92        request.setAttribute("message", "The query string was empty!");
93        forward("/noResult.jsp"); // No result found.
94        return;
95      }
96  
97      // Check query if it is valid and prepare it for the actual search process.
98      StringBuffer checkedQuery = null;
99      try {
100       // Parameter 't' stores the type of the search. This can be one of the following:
101       // ALL words (logical AND), ANY words (logical OR) or EXACT match (phrase search).
102       checkedQuery = new StringBuffer(checkQuery(query, QueryType.valueOf(request.getParameter("t"))).trim());
103 
104       // Check if the user entered words to be excluded.
105       String excludedWords = decodeQuery(request.getQueryString(), true);
106       if (excludedWords != null && !excludedWords.equals("")) {
107         checkedQuery.append(" ");
108         String extendedQuery = extendQuery(excludedWords, true);
109         checkedQuery.append(extendedQuery);
110         userFriendlyQuery.append(" ");
111         userFriendlyQuery.append(new String(extendQuery(excludedWords, false)));
112         request.setAttribute("excludedWords", excludedWords);
113       }
114 
115       // Check if the user chose a field to limit his/her search.
116       String field = request.getParameter("fields");
117       if (field != null && !field.equals("all")) {
118         checkedQuery.append(" ");
119         String withinQuery = addWithinClause(field);
120         checkedQuery.append(withinQuery);
121         userFriendlyQuery.append(" ");
122         userFriendlyQuery.append(new String(withinQuery));
123         request.setAttribute("field", field);
124       }
125     } catch (QueryException e) {
126       request.setAttribute("message", e.getMessage());
127       // Store (HTML-friendly) original query for feedback.
128       request.setAttribute("query", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQuery.toString())));
129       request.setAttribute("queryTF", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQueryTF)));
130       forward("/search.jsp");
131       return;
132     } catch (InvalidUTF8OctetSequenceException e) {
133       request.setAttribute("query", "");
134       request.setAttribute("message", e.getMessage());
135       forward("/noResult.jsp"); // No result found.
136       return;
137     }
138 
139     // Store (HTML-friendly) original query for feedback.
140     request.setAttribute("query", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQuery.toString())));
141     request.setAttribute("queryTF", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQueryTF)));
142 
143     PreparedStatement ps = null;
144     List<Result> results = new ArrayList<Result>();
145     Integer maxScore = new Integer(0);
146 
147     try {
148       ps = con.prepareStatement(fulltextQueryStatement());
149       ps.setString(1, checkedQuery.toString());
150 //      ps.setString(2, checkedQuery.toString());
151       ResultSet rs = ps.executeQuery();
152 
153       boolean maxScoreCounted = false;
154       queryResultsLoop: while (rs.next()) {
155         String id = rs.getString("enzyme_id");
156         // Check if we already have the entry within the results:
157         for (int i = 0; i < results.size(); i++){
158             Result previous = results.get(i);
159             if (id.equals(previous.id)){
160                 previous.addText(rs.getString("text"), rs.getInt("text_order"));
161                 previous.addScore(rs.getInt("score"));
162                 continue queryResultsLoop;
163             }
164         }
165 
166         Result res = new Result();
167         res.id = id;
168         res.ec = EnzymeCommissionNumber.valueOf(rs.getString("ec"));
169         String commonName = rs.getString("common_name");
170         if (commonName == null) commonName = "";
171         res.commonName = commonName;
172         res.status = rs.getString("status");
173         if (!maxScoreCounted) {
174             maxScore = new Integer(rs.getString("score"));
175             maxScoreCounted = true;
176         }
177         res.score = rs.getInt("score");
178         res.addText(rs.getString("text"), rs.getInt("text_order"));
179         if (res.ec.toString().equals(query)){
180         	results.add(0, res);
181         } else {
182         	results.add(res);
183         }
184 
185       }
186     } catch (IllegalArgumentException e) {
187       doErrorExceptionHandling(e);
188       return;
189     } catch (EcException e){
190         doErrorExceptionHandling(e);
191         return;
192     } catch (SQLException e) {
193        LOGGER.error("While searching", e);
194       IntEnzMessenger.sendError(this.getClass().toString(),
195               e.getMessage() + " query (checked query): " + query + "("
196                     + checkedQuery + ")",
197               (String) request.getSession().getAttribute("user"));
198       if (e.getMessage().indexOf("DRG-51030") > -1) {
199         request.setAttribute("message", "Your query resulted in too many terms.\nPlease refine your query.");
200         forward("/search.jsp");
201         return;
202       }
203       if (e.getMessage().indexOf("DRG-50901") > -1) {
204         request.setAttribute("message",
205                 "The given query could not be processed.\nPlease check the usage of operators and special characters.");
206         forward("/search.jsp");
207         return;
208       }
209       if (e.getMessage().indexOf("DRG-10837") > -1) {
210         request.setAttribute("message",
211                 "The given section does not exist.\nPlease choose a section from the drop down list below.");
212         forward("/search.jsp");
213         return;
214       }
215       request.setAttribute("message", "The following database error occured:\n" + e.getMessage() +
216               this.databaseErrorMessage);
217       forward("/error.jsp");
218       return;
219 	} finally {
220       try {
221         ps.close();
222       } catch (SQLException e) {
223          doErrorExceptionHandling(e);
224          return;
225       }
226     }
227 
228     if (results.size() == 0) {
229       // Store (HTML-friendly) original query for feedback.
230       request.setAttribute("query", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQuery.toString())));
231       request.setAttribute("queryTF", WebUtil.escapeHTMLTag(escapeUTF8(userFriendlyQueryTF)));
232       request.setAttribute("message", "No results found for '" + query + "'");
233       forward("/noResult.jsp"); // No result found.
234       return;
235     }
236 
237     if (results.size() == 1){
238         // Go straight to the only one result:
239         String ec = results.get(0).ec.toString();
240         try {
241             switch (EnzymeCommissionNumber.valueOf(ec).getType()) {
242                 case ENZYME:
243                 case PRELIMINARY:
244                     String id = results.get(0).id;
245                     forward("/query?cmd=SearchID&id=" + id);
246                     return;
247                 default:
248                     forward("/query?cmd=SearchEC&ec=" + ec);
249                     return;
250             }
251         } catch (Exception e) {
252             // we shouldn't get here, that would mean there's something wrong in the DB!
253             request.setAttribute("message", ec + " is not a valid EC number.\nPlease try again.");
254             forward("/search.jsp");
255             return;
256         }
257     }
258 
259     List<Result> group = getGroup(results, 0, groupSize);
260     request.setAttribute("group", group);
261 
262     // Set maximum score.
263     request.getSession().setAttribute("max_score", maxScore);
264 
265     // Set current values (start and end index and result size).
266     request.setAttribute("st", "" + 0);
267     if (results.size() > groupSize)
268       request.setAttribute("end", "" + groupSize);
269     else
270       request.setAttribute("end", "" + results.size());
271     request.setAttribute("size", "" + results.size());
272 
273     // Set start index of the following group.
274     if (groupSize < results.size()) {
275       request.setAttribute("nst", "" + groupSize);
276     }
277 
278     // Set the group size.
279     request.getSession().setAttribute("gs", new Integer(groupSize));
280     request.getSession().setAttribute("qResult", escapeUTF8(userFriendlyQuery.toString()));
281     request.getSession().setAttribute("qResultTF", escapeUTF8(userFriendlyQueryTF));
282     request.getSession().setAttribute("result", results);
283     forward("/result.jsp");
284   }
285 
286    private void doErrorExceptionHandling (Exception e) throws ServletException, IOException {
287        LOGGER.error("Other error while searching", e);
288       IntEnzMessenger.sendError(this.getClass().toString(), e.getMessage(),
289               (String) request.getSession().getAttribute("user"));
290       request.setAttribute("message", "The following database error occured:\n" + e.getMessage() +
291               this.databaseErrorMessage);
292       forward("/error.jsp");
293    }
294 
295    /**
296    * Replaces '+' by a space.
297    * <p/>
298    * The plus character in UTF-8 URL encoding encodes the space character.
299    *
300    * @param queryString The query string to be checked. (The parameter cannot be <code>null</code> or empty, because it has been checked already.)
301    * @return query string with spaces.
302    */
303   private String replacePlus(String queryString) {
304     return queryString.replaceAll("\\+", " ");
305   }
306 
307   /**
308    * Decodes the UTF-8 query, i.e. the value of the <code>q</code> parameter.
309    * <p/>
310    * Does the same as <code>URLDecoder</code> except that the octets are decoded differently
311    * (see <code>XChars</code> library for more info).
312    *
313    * @param queryString    The query string to be decoded.
314    * @param isNotParameter
315    * @return the decoded value of the <code>q</code> parameter (i.e. the full text query).
316    * @throws InvalidUTF8OctetSequenceException
317    *          if the given octets are invalid (see <code>XChars</code> library for more info).
318    */
319   private String decodeQuery(String queryString, boolean isNotParameter) throws InvalidUTF8OctetSequenceException {
320     if (queryString == null || queryString.equals("")) return queryString;
321 
322     StringBuffer searchQuery = new StringBuffer(replacePlus(getSearchQuery(queryString, isNotParameter)));
323     Pattern utf8HexPattern = Pattern.compile("((%([a-fA-F0-9]){2}?)+)"); // Pattern for octet sequences.
324     Matcher utf8Matcher = utf8HexPattern.matcher(searchQuery);
325     int index = 0;
326     while (utf8Matcher.find(index)) {
327       String decodedString = SpecialCharacters.decodeUTF8(utf8Matcher.group(1));
328       index = utf8Matcher.start() + decodedString.length();
329       searchQuery.replace(utf8Matcher.start(), utf8Matcher.end(), decodedString);
330       if (index > searchQuery.length() - 1) break;
331       utf8Matcher.reset(searchQuery);
332     }
333 
334     return searchQuery.toString();
335   }
336 
337   private String getSearchQuery(String queryString, boolean isNotParameter) {
338     if (queryString == null || queryString.equals("")) return "";
339     int searchQueryStart = 0;
340     if (isNotParameter)
341       searchQueryStart = queryString.indexOf("not=");
342     else
343       searchQueryStart = queryString.indexOf("q=");
344 
345     if (searchQueryStart == -1) return "";
346     int searchQueryEnd;
347     String temp = null;
348     if (isNotParameter) {
349       temp = queryString.substring(searchQueryStart + 4);
350     } else {
351       temp = queryString.substring(searchQueryStart + 2);
352     }
353     StringBuffer searchQuery = null;
354     if (temp.indexOf('&') > -1) {
355       searchQueryEnd = temp.indexOf('&');
356       searchQuery = new StringBuffer(temp.substring(0, searchQueryEnd));
357     } else {
358       searchQuery = new StringBuffer(temp.substring(0));
359       searchQueryEnd = searchQueryStart + searchQuery.length();
360     }
361     return searchQuery.toString();
362   }
363 
364   private String addWithinClause(String field) {
365     StringBuffer withinClause = new StringBuffer();
366     withinClause.append("WITHIN ");
367     withinClause.append(field);
368     return withinClause.toString();
369   }
370 
371   private String extendQuery(String excludedWords, boolean transformQuery) throws QueryException {
372     if (transformQuery) excludedWords = transformQuery(excludedWords, null, true);
373 
374     StringBuffer extendedQuery = new StringBuffer();
375     for (StringTokenizer stringTokenizer = new StringTokenizer(excludedWords, ","); stringTokenizer.hasMoreTokens();) {
376       String token = stringTokenizer.nextToken().trim();
377       extendedQuery.append("NOT ");
378       extendedQuery.append(token);
379       extendedQuery.append(" ");
380     }
381 
382     return extendedQuery.toString().trim();
383   }
384 
385 
386 
387 
388   // ------------------- PRIVATE METHODS ------------------------
389 
390   private String escapeUTF8(String query) {
391     // Existing XChars elements will be transformed into escaped UTF-8 strings.
392     SpecialCharacters encoding = (SpecialCharacters) request.getSession().getServletContext().getAttribute("characters");
393     if (query.indexOf("<small>") > -1 || query.indexOf("</small>") > -1 || query.indexOf("<smallsup>") > -1 ||
394             query.indexOf("</smallsup>") > -1 || query.indexOf("</smallsub>") > -1 || query.indexOf("<smallsub>") > -1)
395       return encoding.xml2Display(query, EncodingType.SWISSPROT_CODE);
396     return encoding.xml2Display(query);
397   }
398 
399   /**
400    * Checks the query for various things.
401    * <p/>
402    *
403    * @param query
404    * @param type  The type of query. Can be <code>null</code>.
405    * @return
406    * @throws QueryException
407    */
408   private String checkQuery(String query, QueryType type) throws QueryException {
409     assert query != null && !query.equals("");
410     return transformQuery(query, type, false);
411   }
412 
413   public String transformQuery(String query, QueryType queryType, boolean exclusion) throws QueryException {
414     assert query != null && !query.equals("");
415 
416     // AND, OR and NOT operators will be removed, because manually added operators are not supported.
417     query = escapeBooleanOperators(operators2Uppercase(query));
418 
419     // XChars formmattings will be removed as they do not appear as tokens within the search engine.
420     query = removeFormattings(query);
421 
422     // Queries which are not phrase queries will be handled differently.
423     if (queryType == null || queryType != QueryType.EXACT) {
424       // The within operator will be removed, if the user entered it manually.
425       query = escapeWithinOperator(query);
426 
427       // Check for 'silly' queries.
428       String longQueryWord = getLongQueryWord(query);
429       if (!longQueryWord.equals(""))
430         throw new QueryException("\"" + longQueryWord.substring(0, 80) +
431                 "\"... is too long a word. Try using a shorter word.");
432 
433       if (countQueryWords(query) > 10)
434         throw new QueryException("The search query must not exceed 10 words.");
435 
436       if (!exclusion) {
437         // Use the type parameter to alter the query accordingly.
438         query = typifyQuery(query, queryType);
439       }
440     }
441 
442     // Existing XChars elements will be transformed into escaped UTF-8 strings.
443     query = escapeUTF8(query);
444 
445      query = removeFormattings(query);
446 
447     // Escape remaining tags.
448     if (Pattern.matches(".*?\\<.+?\\>.*?", query))
449       query = query.replaceAll("\\<", "\\\\<").replaceAll("\\>", "\\\\>").replaceAll("\\/", "\\\\/");
450 
451     // Escape/Remove unsupported operators.
452     query = escapeUnsupportedOperators(query);
453 
454     return query;
455   }
456 
457   /**
458    * Removes the within operator if it has been manually entered by the user.
459    *
460    * @param query
461    * @return
462    */
463   private String escapeWithinOperator(String query) {
464     assert query != null;
465     query = " " + query + " ";
466     return query.replaceAll("(\\sWITHIN\\s)", " ").trim();
467   }
468 
469   /**
470    * Handles the special case when the user entered <code>XChars</code> formattings.
471    * <p/>
472    * Currently only <code>&lt;smallsup&gt;</code> and <code>&lt;smallsub&gt;</code> elements represent
473    * <code>XChars</code> formattings.
474    *
475    * @param query The query to be checked.
476    * @return the checked query.
477    */
478   private String removeFormattings(String query) {
479     assert query != null;
480     query = query.replaceAll("\\<smallsu[pb]\\>", "").replaceAll("\\<\\/smallsu[pb]\\>", "");
481     query = query.replaceAll("\\<\\/?small\\>", "");
482     query = query.replaceAll("\\<\\/?sup\\>", "");
483     query = query.replaceAll("\\<\\/?sub\\>", "");
484     query = query.replaceAll("\\<\\/?b\\>", "");
485     query = query.replaceAll("\\<\\/?i\\>", "");
486     query = query.replaceAll("\\<\\/?p\\/?\\>", "");
487     return query.replaceAll("\\<activated\\>", "").replaceAll("\\<\\/activated\\>", "");
488   }
489 
490   /**
491    * Escapes unsupported characters or characters which are reserved characters of the search engine.
492    *
493    * @param query The query to be checked.
494    * @return The checked query.
495    */
496   private String escapeUnsupportedOperators(String query) {
497     assert query != null;
498     StringBuffer checkedQuery = new StringBuffer();
499     char[] chars = query.toCharArray();
500     char previous = '-';
501     char current = '-';
502     char next = '-';
503     for (int iii = 0; iii < chars.length; iii++) {
504       if (iii > 0) previous = current;
505       current = chars[iii];
506       if (iii < chars.length - 1)
507         next = chars[(iii + 1)];
508       else
509         next = '-';
510       switch (checkCharacter(previous, current, next)) {
511         case 0:
512           checkedQuery.append(current);
513           break;
514         case 1: // Escape character.
515           checkedQuery.append("\\");
516           checkedQuery.append(current);
517           break;
518         case 2: // Remove character and previous space.
519           checkedQuery = checkedQuery.deleteCharAt(checkedQuery.length() - 1);
520           break;
521       }
522     }
523 
524     return checkedQuery.toString();
525   }
526 
527   /**
528    * Checks whether the current character is an unsupported operator or reserved character of the search engine.
529    *
530    * @param preceeding The preceeding character.
531    * @param current    The current character.
532    * @param next       The nect character.
533    * @return a code fot the calling method (0 = do not escape, 1 = escape, 2 = remove character).
534    */
535   private int checkCharacter(char preceeding, char current, char next) {
536     char[] unsupportedOperators = {'[', ']', ',', '-', '_', '$', '!', '=', '*', ':', '?', '|', '>', '&', '#', ';', '(', ')', '.'};
537     Arrays.sort(unsupportedOperators);
538     int index = Arrays.binarySearch(unsupportedOperators, current);
539     if (index > -1) {
540       if (preceeding == ' ' && next == ' ') return 2; // remove character (should never happen, becuase it is checked earlier)
541       if (preceeding != '\\') return 1; // escape character
542     }
543     return 0; // do not escape
544   }
545 
546   /**
547    * Inserts logical operators in the query string according to the given type of search.
548    * <p/>
549    * Currently only <code>AND</code> and <code>OR</code> are supported.
550    *
551    * @param query The query to be extended.
552    * @param type  The type of the query.
553    * @return The extended query.
554    */
555   private String typifyQuery(String query, QueryType type) {
556     assert query != null;
557     StringBuffer adjustedQuery = new StringBuffer();
558     int iii = 0;
559     for (StringTokenizer stringTokenizer = new StringTokenizer(query); stringTokenizer.hasMoreTokens();) {
560       String token = stringTokenizer.nextToken();
561       if (iii > 0) {
562         if (type == QueryType.ANY)
563           adjustedQuery.append("OR ");
564         if (type == QueryType.ALL)
565           adjustedQuery.append("AND ");
566       }
567       adjustedQuery.append(token);
568       adjustedQuery.append(" ");
569       iii++;
570     }
571     return adjustedQuery.toString().trim();
572   }
573 
574   /**
575    * Removes supported boolean operators which have been entered manually.
576    * <p/>
577    * All supported boolean operators can only be used by filling the search form correctly (i.e. without entering
578    * them manually).
579    *
580    * @param query The query to be checked.
581    * @return The checked query.
582    */
583   private String escapeBooleanOperators(String query) {
584     assert query != null;
585     query = " " + query + " ";
586     query = query.replaceAll("(\\sAND\\s)", " {AND} ");
587     query = query.replaceAll("(\\sOR\\s)", " {OR} ");
588     query = query.replaceAll("(\\sNOT\\s)", " {NOT} ");
589     query = query.replaceAll("\\s\\&\\s", " {&} ");
590     query = query.replaceAll("\\s\\|\\s", " {|} ");
591     query = query.replaceAll("\\s\\~\\s", " {~} ");
592      if (query.indexOf(" BT ") != - 1)
593          query = query.replaceAll(" BT ", " {BT} ");
594       if (query.indexOf(" ABOUT ") != - 1)
595          query = query.replaceAll(" ABOUT ", " {ABOUT} ");
596       if (query.indexOf(" ACCUM ") != - 1)
597          query = query.replaceAll(" ACCUM ", " {ACCUM} ");
598       if (query.indexOf(" BTG ") != - 1)
599          query = query.replaceAll(" BTG ", " {BTG} ");
600       if (query.indexOf(" BTI ") != - 1)
601          query = query.replaceAll(" BTI ", " {BTI} ");
602       if (query.indexOf(" BTP ") != - 1)
603          query = query.replaceAll(" BTP ", " {BTP} ");
604       if (query.indexOf(" FUZZY ") != - 1)
605          query = query.replaceAll(" FUZZY ", " {FUZZY} ");
606       if (query.indexOf(" HASPATH ") != - 1)
607          query = query.replaceAll(" HASPATH ", " {HASPATH} ");
608       if (query.indexOf(" INPATH ") != - 1)
609          query = query.replaceAll(" INPATH ", " {INPATH} ");
610       if (query.indexOf(" MINUS ") != - 1)
611          query = query.replaceAll(" MINUS ", " {MINUS} ");
612       if (query.indexOf(" NEAR ") != - 1)
613          query = query.replaceAll(" NEAR ", " {NEAR} ");
614       if (query.indexOf(" NT ") != - 1)
615          query = query.replaceAll(" NT ", " {NT} ");
616       if (query.indexOf(" NTG ") != - 1)
617          query = query.replaceAll(" NTG ", " {NTG} ");
618       if (query.indexOf(" NTI ") != - 1)
619          query = query.replaceAll(" NTI ", " {NTI} ");
620       if (query.indexOf(" NTP ") != - 1)
621          query = query.replaceAll(" NTP ", " {NTP} ");
622       if (query.indexOf(" PT ") != - 1)
623          query = query.replaceAll(" PT ", " {PT} ");
624       if (query.indexOf(" RT ") != - 1)
625          query = query.replaceAll(" RT ", " {RT} ");
626       if (query.indexOf(" SQE ") != - 1)
627          query = query.replaceAll(" SQE ", " {SQE} ");
628       if (query.indexOf(" SYN ") != - 1)
629          query = query.replaceAll(" SYN ", " {SYN} ");
630       if (query.indexOf(" TR ") != - 1)
631          query = query.replaceAll(" TR ", " {TR} ");
632       if (query.indexOf(" TRSYN ") != - 1)
633          query = query.replaceAll(" TRSYN ", " {TRSYN} ");
634       if (query.indexOf(" TT ") != - 1)
635          query = query.replaceAll(" TT ", " {TT} ");
636     return query.trim();
637   }
638 
639   /**
640    * Checks if a query word is longer than 200 characters.
641    *
642    * @param query The query string.
643    * @return The word which length is greater than 200 characters.
644    */
645   private String getLongQueryWord(String query) {
646     assert query != null;
647     for (StringTokenizer stringTokenizer = new StringTokenizer(query); stringTokenizer.hasMoreTokens();) {
648       String word = stringTokenizer.nextToken();
649       if (word.length() > 200) return word;
650     }
651     return "";
652   }
653 
654   /**
655    * Counts the number of query words.
656    *
657    * @param detaggedQuery The query string.
658    * @return The number of query words (w/o operators).
659    */
660   private int countQueryWords(String detaggedQuery) {
661     int count = 0;
662     for (StringTokenizer stringTokenizer = new StringTokenizer(detaggedQuery); stringTokenizer.hasMoreTokens(); stringTokenizer.nextToken()) {
663       count++;
664     }
665     return count;
666   }
667 
668   /**
669    * Checks if the given string is a supported operator.
670    *
671    * @param word The search to be checked.
672    * @return <code>true</code>, if the given string is a supported operator.
673    */
674   private boolean isOperator(String word) {
675     assert word != null;
676     if (word.trim().toUpperCase().equals("AND") ||
677             word.toUpperCase().equals("OR") ||
678             word.toUpperCase().equals("NOT") ||
679             word.toUpperCase().equals("WITHIN"))
680       return true;
681 
682     return false;
683   }
684 
685   /**
686    * Writes all supported operators using uppercase letters.
687    *
688    * @param query The query to be checked.
689    * @return The formatted query.
690    */
691   private String operators2Uppercase(String query) {
692     assert query != null && !query.equals("");
693     StringBuffer sb = new StringBuffer();
694     for (StringTokenizer stringTokenizer = new StringTokenizer(query); stringTokenizer.hasMoreTokens();) {
695       String word = stringTokenizer.nextToken();
696       if (isOperator(word)) {
697         sb.append(word.toUpperCase() + " ");
698       } else {
699         sb.append(word + " ");
700       }
701     }
702 
703     return sb.toString().trim();
704   }
705 
706   /**
707    * Returns a result group to be shown in the browser.
708    *
709    * @param result The result.
710    * @param start  The start index of the group.
711    * @param end    The end index og the group.
712    * @return the selected group.
713    */
714   private List<Result> getGroup(List<Result> result, int start, int end) {
715     assert result != null;
716     List<Result> group = new ArrayList<Result>();
717 
718     if (end > result.size()) {
719       for (int iii = start; iii < result.size(); iii++) {
720         group.add(result.get(iii));
721       }
722     } else {
723       for (int iii = start; iii < end; iii++) {
724         group.add(result.get(iii));
725       }
726     }
727 
728     return group;
729   }
730 
731   private static class QueryType {
732     private String type;
733     public static final QueryType ANY = new QueryType("ANY");
734     public static final QueryType ALL = new QueryType("ALL");
735     public static final QueryType EXACT = new QueryType("EXACT");
736 
737     /**
738      * Returns the corresponding instance of the given query type.
739      * <p/>
740      * If the query type does not match any type an exception is thrown.
741      *
742      * @param type The query type.
743      * @return the class constant corresponding to the given type.
744      * @throws IllegalArgumentException if the type is invalid.
745      */
746     public static QueryType valueOf(String type) {
747       if (type == null || type.equals("") || type.toUpperCase().equals("UNDEF")) return EXACT;
748       type = type.toUpperCase();
749       if (type.equals(ANY.toString())) return ANY;
750       if (type.equals(ALL.toString())) return ALL;
751       if (type.equals(EXACT.toString())) return EXACT;
752       throw new IllegalArgumentException();
753     }
754 
755     /**
756      * Object cannot be created outside this class.
757      *
758      * @param type The query type.
759      */
760     private QueryType(String type) {
761       this.type = type;
762     }
763 
764     /**
765      * Standard equals method.
766      *
767      * @param o Object to be compared to this one.
768      * @return <code>true</code> if the objects are equal.
769      */
770     public boolean equals(Object o) {
771       if (this == o) return true;
772       if (!(o instanceof QueryType)) return false;
773 
774       final QueryType queryType = (QueryType) o;
775 
776       if (type != null ? !type.equals(queryType.type) : queryType.type != null) return false;
777 
778       return true;
779     }
780 
781     /**
782      * Returns the hash code of this object.
783      *
784      * @return the hash code of this object.
785      */
786     public int hashCode() {
787       return (type != null ? type.hashCode() : 0);
788     }
789 
790     /**
791      * Returns the query type's query.
792      *
793      * @return the query type's query.
794      */
795     public String toString() {
796       return type;
797     }
798   }
799 
800   public class Result {
801       private String id;
802       private EnzymeCommissionNumber ec;
803       private String commonName;
804       private String status;
805       private int score;
806       private SortedMap<Integer, String> xmlFragments;
807       private void addText(String text, int order){
808           if (xmlFragments == null) xmlFragments = new TreeMap<Integer, String>();
809           xmlFragments.put(new Integer(order), text);
810       }
811       private void addScore(int i) {
812           score += i;
813       }
814       private String getText(){
815           StringBuffer wholeXml = new StringBuffer();
816           for (Iterator<String> it = xmlFragments.values().iterator(); it.hasNext();){
817               wholeXml.append(it.next());
818           }
819           return wholeXml.toString();
820       }
821       public EnzymeCommissionNumber getEc() {
822           return ec;
823       }
824       public String getId() {
825           return id;
826       }
827       public String getCommonName() {
828           return commonName;
829       }
830       public String getStatus() {
831           return status;
832       }
833       public boolean isActive(){
834           return getText().indexOf("<active></active>") == -1;
835       }
836 
837   }
838 }