1 package uk.ac.ebi.intenz.tools.sib.writer;
2
3 import java.util.Iterator;
4 import java.util.SortedSet;
5 import java.util.TreeSet;
6 import java.util.regex.Matcher;
7 import java.util.regex.Pattern;
8
9
10
11
12
13
14
15 public class LineWrapperFactory {
16
17
18
19
20 private static final String[] lineBreakPatterns = {
21 "\\=\\s",
22 "[^\\(\\sEC]\\s",
23 "\\-\\d+\\-[^\\>\\|\\)]",
24 "[\\]\\)]\\-[^\\>\\|\\)]",
25 "\\-\\w{1,1}\\-[^\\>\\|\\)]",
26 "\\-\\d(\\,\\d)+\\-[^\\>\\|\\)]",
27 "\\w{3,}\\-[^\\>\\|\\)]",
28 "\\,\\d+\\-[^\\>\\|\\)]"
29 };
30
31
32
33
34
35
36
37 private static final String[] enforcedLineBreakPatterns = {
38 "\\:?((?<!and)\\s\\(\\d+\\)\\s+)(?!.*?and\\s\\(\\d+\\).*?).+?\\."
39 };
40
41
42
43
44
45
46
47
48
49
50 public static LineWrapper create(String wholeText, LineType lineType) {
51 if (wholeText == null || lineType == null) throw new NullPointerException();
52
53 if (!wholeText.matches(".*?[\\:\\.]\\s+?\\(\\d+\\)\\s+.+?")) {
54 if (lineType == LineType.CA && wholeText.matches(".+?\\s\\+\\s.+?\\s\\=\\s.+?\\s\\+\\s.+?")) {
55
56
57 Pattern pattern = Pattern.compile("(.+?)\\s\\+\\s(.+?)\\s\\=\\s(.+?)\\s\\+\\s(.+)");
58 Matcher matcher = pattern.matcher(wholeText);
59 if (matcher.find()) {
60 String compound1 = matcher.group(1);
61 String compound2 = matcher.group(2);
62 String compound3 = matcher.group(3);
63 String compound4 = matcher.group(4);
64 if (countWords(compound1) > 3 || countWords(compound2) > 3 ||
65 countWords(compound3) > 3 || countWords(compound4) > 3)
66 return new DefaultLineWrapper();
67 }
68
69 return new ReactionWrapper();
70 }
71 return new DefaultLineWrapper();
72 } else
73 return new OrderedListLineWrapper();
74 }
75
76 private static class DefaultLineWrapper implements LineWrapper {
77 private static final int MIN_LENGTH_DIVISOR = 4;
78 private static final int MIN_LENGTH_NUMBER = 2;
79
80 public int findPosition(String text, int netLineWidth) throws EnzymeFlatFileWriteException {
81 SortedSet possibleLineBreakPositions = getPossibleLineBreakPositions(text, netLineWidth);
82 if (possibleLineBreakPositions == null) throw new EnzymeFlatFileWriteException("No line break positions found");
83 return getNearestLineBreakPosition(possibleLineBreakPositions, netLineWidth, text);
84 }
85
86
87
88
89
90
91
92
93
94 protected SortedSet getPossibleLineBreakPositions(String text, int lineWidth) {
95 assert text != null : text;
96 SortedSet possibleLineBreakPositions = null;
97
98 Pattern pattern;
99 Matcher matcher;
100
101 for (int iii = 0; iii < lineBreakPatterns.length; iii++) {
102 String lineBreakPattern = lineBreakPatterns[iii];
103
104 pattern = Pattern.compile(lineBreakPattern);
105 matcher = pattern.matcher(text);
106 while (matcher.find()) {
107 int position = matcher.end();
108
109 if (lineBreakPattern.endsWith("\\s") || lineBreakPattern.endsWith("[^\\>\\|\\)]")) position--;
110 if (possibleLineBreakPositions == null){
111 possibleLineBreakPositions = new TreeSet();
112 }
113 possibleLineBreakPositions.add(new Integer(position));
114 }
115 }
116
117
118
119
120
121
122 return possibleLineBreakPositions;
123 }
124
125
126
127
128
129
130
131
132 protected int getNearestLineBreakPosition(SortedSet possibleLineBreakPositions, int netLineWidth, String text) {
133 assert possibleLineBreakPositions != null : possibleLineBreakPositions;
134 assert netLineWidth > -1 : netLineWidth;
135
136 int previousLineBreakPosition = 0;
137 int possibleLineBreakPosition = 0;
138 int lastSpace = 0;
139
140 Iterator it = possibleLineBreakPositions.iterator();
141 while (it.hasNext()) {
142 int position = ((Integer) it.next()).intValue();
143 if (position <= netLineWidth) {
144 previousLineBreakPosition = possibleLineBreakPosition;
145 if (text.charAt(previousLineBreakPosition) == ' ') lastSpace = previousLineBreakPosition;
146 possibleLineBreakPosition = position;
147 }
148 }
149
150
151 if (previousLineBreakPosition > 0) {
152
153 if ((possibleLineBreakPosition - previousLineBreakPosition < 5 &&
154 (text.charAt(previousLineBreakPosition - 1) == '=' ||
155 text.charAt(previousLineBreakPosition - 1) == '+' ||
156 text.charAt(previousLineBreakPosition - 1) == ','
157
158
159 ))) {
160 return previousLineBreakPosition;
161 }
162 }
163
164
165
166
167 if(possibleLineBreakPosition==0)
168 if(possibleLineBreakPositions.first()!=null)
169 possibleLineBreakPosition = ((Integer) possibleLineBreakPositions.first()).intValue();
170
171 return possibleLineBreakPosition;
172 }
173
174
175 private boolean isDashedWordSpecialCase(int index, int lastSpace, String text) {
176 String endOfLine = text.substring(lastSpace, index).trim();
177 String wrappedWord = "";
178 if (text.indexOf(" ", lastSpace + 1) > -1) {
179 wrappedWord = text.substring(lastSpace, text.indexOf(" ", lastSpace + 1)).trim();
180 } else {
181 wrappedWord = text.substring(lastSpace);
182 }
183
184
185
186
187
188
189
190
191
192
193
194
195 if (endOfLine.matches("\\w+?\\-")) {
196 if (wrappedWord.indexOf("-") == wrappedWord.lastIndexOf("-")) return false;
197 if (endOfLine.length() < (wrappedWord.length() / MIN_LENGTH_DIVISOR)) return true;
198 return false;
199 }
200
201 return false;
202 }
203 }
204
205 private static class OrderedListLineWrapper extends DefaultLineWrapper {
206 protected SortedSet getPossibleLineBreakPositions(String text, int lineWidth) {
207 SortedSet possibleLineBreakPositions = null;
208 Pattern pattern;
209 Matcher matcher;
210 for (int iii = 0; iii < enforcedLineBreakPatterns.length; iii++) {
211 if (iii == 0) possibleLineBreakPositions = new TreeSet();
212 String lineBreakPattern = enforcedLineBreakPatterns[iii];
213
214 pattern = Pattern.compile(lineBreakPattern);
215 matcher = pattern.matcher(text);
216 if (matcher.find()) {
217 int position = text.indexOf(matcher.group(1)) + 1;
218 if (position > lineWidth) break;
219 possibleLineBreakPositions.add(new Integer(position));
220 return possibleLineBreakPositions;
221 }
222 }
223
224 return super.getPossibleLineBreakPositions(text, lineWidth);
225 }
226 }
227
228 private static class ReactionWrapper extends DefaultLineWrapper {
229
230 protected SortedSet getPossibleLineBreakPositions(String text, int lineWidth) {
231 SortedSet possibleLineBreakPositions = super.getPossibleLineBreakPositions(text, lineWidth);
232 SortedSet cleanedLineBreakPositions = new TreeSet();
233 for (Iterator it = possibleLineBreakPositions.iterator(); it.hasNext();) {
234 int position = ((Integer) it.next()).intValue();
235 if (text.charAt(position) == ' ') {
236 char before = text.charAt(position - 1);
237 char after = text.charAt(position + 1);
238
239 if (before != '+' && before != '=')
240 continue;
241 }
242 cleanedLineBreakPositions.add(new Integer(position));
243 }
244
245 return cleanedLineBreakPositions;
246 }
247 }
248
249 private static int countWords(String text) {
250 Pattern pattern = Pattern.compile("\\b");
251 Matcher matcher = pattern.matcher(text);
252 int count = 0;
253 while (matcher.find()) count++;
254 return count / 2;
255 }
256
257 }