1 package uk.ac.ebi.intenz.tools.sib.writer;
2
3 import java.util.ArrayList;
4 import java.util.List;
5 import java.util.regex.Matcher;
6 import java.util.regex.Pattern;
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 public class CC_LineFormatter extends DefaultLineFormatter {
23
24
25
26
27
28
29
30
31
32
33 public String formatLines(String text, LineType lineType) throws EnzymeFlatFileWriteException {
34 if (text == null || lineType == null) throw new NullPointerException();
35 if (lineType != LineType.CC) throw new IllegalArgumentException();
36
37
38 List<String> commentSentences = getCommentSentences(text);
39 StringBuilder CCContent = new StringBuilder();
40 boolean containsOrderedListWithIndent = text.matches(".*?\\:\\s+?\\(\\d+\\)\\s+.+?");
41 for (Object commentSentence : commentSentences) {
42 String sentence = (String) commentSentence;
43 CCContent.append(wrapCCLine(sentence, containsOrderedListWithIndent));
44 }
45
46
47 String ccLines = CCContent.toString();
48 if (CCContent.indexOf("/.") > -1)
49 return CCContent.replace(CCContent.indexOf("/."), CCContent.indexOf("/.") + 2, "/").toString();
50
51 return ccLines;
52 }
53
54
55
56
57
58
59
60
61
62
63
64 private String wrapCCLine(String text, boolean addIndent) throws EnzymeFlatFileWriteException {
65 assert text != null : text;
66
67 String lineStart = "CC ";
68 int netLineWidth = LINEWIDTH - (lineStart.length() + 4);
69 StringBuilder wrappedText = new StringBuilder();
70 if (text.length() <= netLineWidth) {
71 wrappedText.append(lineStart);
72 wrappedText.append("-!- ");
73 wrappedText.append(text);
74 wrappedText.append("\n");
75 return wrappedText.toString();
76 }
77
78 StringBuilder restText = new StringBuilder(text);
79 wrappedText.append(lineStart);
80 wrappedText.append("-!- ");
81
82 boolean orderedListIndicatorFound = false;
83 boolean foundBeginningOfItem = false;
84 String indent = "";
85 LineWrapper lineWrapPositioner = LineWrapperFactory.create(text, LineType.CC);
86 while (restText.toString().trim().length() > netLineWidth) {
87 int position;
88 if (restText.charAt(0) == ' ') restText.deleteCharAt(0);
89
90 if (orderedListIndicatorFound) {
91 if (restText.toString().trim().matches("\\(\\d+\\).+?")) {
92 position = lineWrapPositioner.findPosition(restText.toString().trim(), netLineWidth);
93 foundBeginningOfItem = true;
94 } else {
95 position = lineWrapPositioner.findPosition(restText.toString().trim(), netLineWidth - indent.length());
96 foundBeginningOfItem = false;
97 }
98 } else {
99 position = lineWrapPositioner.findPosition(restText.toString().trim(), netLineWidth);
100 }
101
102
103 String line = restText.substring(0, position).trim();
104
105
106 if (addIndent && line.trim().endsWith(":")) orderedListIndicatorFound = true;
107 if (foundBeginningOfItem)
108 indent = getIndent(line);
109
110 restText.delete(0, position);
111
112 if (orderedListIndicatorFound && !foundBeginningOfItem) {
113 wrappedText.append(indent);
114 }
115
116 wrappedText.append(line);
117 wrappedText.append("\n");
118 wrappedText.append(lineStart);
119 wrappedText.append(" ");
120 }
121
122 String tail = restText.toString().trim();
123
124
125
126 wrappedText.append(tail);
127 wrappedText.append("\n");
128
129 return wrappedText.toString();
130 }
131
132
133
134
135
136
137
138 public List<String> getCommentSentences(String commentText) {
139 assert commentText != null : commentText;
140
141 List<String> sentences = new ArrayList<String>();
142 final String sentenceDelimiterPattern = "(.*?\\.\\s|.*?http\\:\\/\\/\\S+?\\/\\s)";
143
144
145
146
147 final String[][] nonSentenceDelimiters = {
148 {"C. ", "\\p{Lower}.*?"},
149 {"E. ", ".*?"},
150 {"L. ", "\\(.*"},
151 {"(cf. ", ".*?"},
152 {"Cf. ", ".*?"},
153 {"cf. ", ".*?"},
154 {"i.e. ", ".*?"},
155 {"i.e., ", ".*?"},
156 {"i.e.", ".*?"},
157 {"i.e.,", ".*?"},
158 {"e.g. ", ".*?"},
159 {"e.g., ", ".*?"},
160 {"e.g.", ".*?"},
161 {"e.g.,", ".*?"},
162 {"etc. ", ".*?"},
163 {"etc., ", ".*?"},
164 {"etc.", ".*?"},
165 {"etc.,", ".*?"},
166 {"sp. ", "(\\p{Lower}|\\p{Upper}+[ -]?\\p{Digit}+|OxB-1|YAA|PCC|A1-3|No\\.|\\d+|\\(|WS).*?"},
167 {"sp., ", ".*?"},
168 {"sp.", ".*?"},
169 {"sp.,", ".*?"},
170 {"spp. ", ".*?"},
171 {".) ", ".*?"},
172 {".)", ".*?"},
173 {".( ", ".*?"},
174 {".(", ".*?"},
175 {" var. ", ".*?"},
176 {"No. ", ".*?"},
177 {"bv. ", ".*?"},
178 { " pv. ", ".*?" }
179 };
180
181 Pattern pattern = Pattern.compile(sentenceDelimiterPattern);
182 Matcher matcher = pattern.matcher(commentText);
183
184 boolean concat = false;
185 StringBuffer sentence = null;
186 int end = 0;
187 boolean found = matcher.find();
188 while (found) {
189 String substring = matcher.group(1);
190 end = matcher.end(1);
191
192
193 if (concat) {
194 sentence.append(substring);
195 concat = false;
196 } else {
197 sentence = new StringBuffer(substring);
198 }
199
200
201 for (String[] nonSentenceDelimiter1 : nonSentenceDelimiters) {
202 String nonSentenceDelimiter = nonSentenceDelimiter1[0];
203 if (substring.endsWith(nonSentenceDelimiter)) {
204 String restOfSentence = commentText.substring(matcher.end());
205 if (Pattern.matches(nonSentenceDelimiter1[1], restOfSentence)) {
206 concat = true;
207 break;
208 }
209 }
210 }
211
212 found = matcher.find();
213 if (concat && found) continue;
214
215 if (!concat)
216 sentences.add(sentence.toString().trim());
217 else
218 sentences.add(sentence.toString());
219 }
220
221 if (concat) {
222 StringBuilder temp =
223 new StringBuilder(sentences.get(sentences.size() - 1));
224 temp.append(commentText.substring(end));
225 sentences.remove(sentences.size() - 1);
226 sentences.add(temp.toString().trim());
227 } else {
228 sentences.add(commentText.substring(end).trim());
229 }
230
231 if (commentText.matches(".*?\\:\\s\\(\\d+\\)\\s+.*?")) sentences = mergeListSentences(sentences);
232
233 return sentences;
234 }
235
236
237
238
239
240
241
242 private List<String> mergeListSentences(List<String> sentences) {
243 List<String> mergedSentences = new ArrayList<String>();
244
245 int count = 0;
246 for (int iii = 0; iii < sentences.size(); iii++) {
247 String sentence = sentences.get(iii);
248 if (sentence.matches("^\\(\\d+\\).*?")) {
249 StringBuilder extendedSentence = new StringBuilder(
250 mergedSentences.remove(iii - (1 + count)));
251 extendedSentence.append(" ");
252 extendedSentence.append(sentence);
253 mergedSentences.add(iii - (1 + count), extendedSentence.toString());
254 count++;
255 } else
256 mergedSentences.add(sentence);
257 }
258
259 return mergedSentences;
260 }
261
262
263
264
265
266
267
268 private String getIndent(String line) {
269 assert line != null : line;
270
271 StringBuilder indent = new StringBuilder();
272
273
274
275
276
277
278 return indent.toString();
279 }
280
281 }