diff --git a/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java b/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java index d68448e..9271168 100644 --- a/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java +++ b/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java @@ -52,13 +52,11 @@ public Builder endOfLine() { } public Builder then(String pValue) { - this.add("(?:" + sanitize(pValue) + ")"); - return this; + return this.add("(?:" + sanitize(pValue) + ")"); } public Builder find(String value) { - this.then(value); - return this; + return this.then(value); } public Builder maybe(final String pValue) { @@ -66,53 +64,104 @@ public Builder maybe(final String pValue) { } public Builder anything() { - this.add("(?:.*)"); - return this; + return this.add("(?:.*)"); } public Builder anythingButNot(final String pValue) { - this.add("(?:[^" + sanitize(pValue) + "]*)"); - return this; + return this.add("(?:[^" + sanitize(pValue) + "]*)"); } public Builder something() { - this.add("(?:.+)"); - return this; + return this.add("(?:.+)"); } public Builder somethingButNot(final String pValue) { - this.add("(?:[^" + sanitize(pValue) + "]+)"); - return this; + return this.add("(?:[^" + sanitize(pValue) + "]+)"); } public Builder lineBreak() { - this.add("(?:\\n|(\\r\\n))"); - return this; + return this.add("(?:\\n|(\\r\\n))"); } public Builder br() { - this.lineBreak(); - return this; + return this.lineBreak(); } + /** + * @return tab character ('\u0009') + */ public Builder tab() { - this.add("\\t"); - return this; + return this.add("(?:\\t)"); } + /** + * @return word, same as [a-zA-Z_0-9]+ + */ public Builder word() { - this.add("\\w+"); - return this; + return this.add("(?:\\w+)"); + } + + + /* + --- Predefined character classes + */ + + /** + * @return word character, same as [a-zA-Z_0-9] + */ + public Builder wordChar() { + return this.add("(?:\\w)"); } + + /** + * @return non-word character: [^\w] + */ + public Builder nonWordChar() { + return this.add("(?:\\W)"); + } + + /** + * @return non-digit: [^0-9] + */ + public Builder nonDigit() { + return this.add("(?:\\D)"); + } + + /** + * @return same as [0-9] + */ + public Builder digit() { + return this.add("(?:\\d)"); + } + + /** + * @return whitespace character, same as [ \t\n\x0B\f\r] + */ + public Builder space() { + return this.add("(?:\\s)"); + } + + /** + * @return non-whitespace character: [^\s] + */ + public Builder nonSpace() { + return this.add("(?:\\S)"); + } + + + /* + --- / end of predefined character classes + */ + + public Builder anyOf(final String pValue) { this.add("[" + sanitize(pValue) + "]"); return this; } public Builder any(final String value) { - this.anyOf(value); - return this; + return this.anyOf(value); } public Builder range(String... pArgs) { diff --git a/src/test/java/ru/lanwen/verbalregex/PredefinedCharClassesTest.java b/src/test/java/ru/lanwen/verbalregex/PredefinedCharClassesTest.java new file mode 100644 index 0000000..c5a9efc --- /dev/null +++ b/src/test/java/ru/lanwen/verbalregex/PredefinedCharClassesTest.java @@ -0,0 +1,88 @@ +package ru.lanwen.verbalregex; + +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.MatcherAssert.assertThat; +import static ru.lanwen.verbalregex.VerbalExpression.regex; + +/** + * User: lanwen + * Date: 13.05.14 + * Time: 16:26 + */ +public class PredefinedCharClassesTest { + + public static final String LETTERS_NO_DIGITS = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_"; + public static final String DIGITS = "0123456789"; + public static final String NON_LETTERS = ";'[]{}|?/"; + public static final String SPACE = " \t\n\f\r"; + + @Test + public void testWordChar() throws Exception { + VerbalExpression regex = regex().wordChar().build(); + + assertThat("Not matches on letters", regex.test(LETTERS_NO_DIGITS + DIGITS), is(true)); + assertThat("matches on non letters", regex.test(NON_LETTERS + SPACE), is(false)); + assertThat("Extracts wrong word chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(LETTERS_NO_DIGITS + DIGITS)); + + } + + @Test + public void testNonWordChar() throws Exception { + VerbalExpression regex = regex().nonWordChar().build(); + + assertThat("matches on letters", regex.test(LETTERS_NO_DIGITS + DIGITS), is(false)); + assertThat("Not matches on non letters", regex.test(NON_LETTERS + SPACE), is(true)); + assertThat("Extracts wrong chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(NON_LETTERS + SPACE)); + + } + + @Test + public void testSpace() throws Exception { + VerbalExpression regex = regex().space().build(); + + assertThat("matches on letters", regex.test(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS), is(false)); + assertThat("Not matches on space", regex.test(SPACE), is(true)); + assertThat("Extracts wrong chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(SPACE)); + + } + + @Test + public void testNonSpace() throws Exception { + VerbalExpression regex = regex().nonSpace().build(); + + assertThat("Not matches on non space", regex.test(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS), is(true)); + assertThat("matches on space", regex.test(SPACE), is(false)); + assertThat("Extracts wrong chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), not(SPACE)); + + } + + @Test + public void testDigit() throws Exception { + VerbalExpression regex = regex().digit().build(); + + assertThat("matches on letters", regex.test(LETTERS_NO_DIGITS + SPACE + NON_LETTERS), is(false)); + assertThat("Not matches on digits", regex.test(DIGITS), is(true)); + assertThat("Extracts wrong chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), is(DIGITS)); + + } + + @Test + public void testNonDigit() throws Exception { + VerbalExpression regex = regex().nonDigit().build(); + + assertThat("Not matches on letters", regex.test(LETTERS_NO_DIGITS + SPACE + NON_LETTERS), is(true)); + assertThat("matches on digits", regex.test(DIGITS), is(false)); + assertThat("Extracts wrong chars", + regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), not(DIGITS)); + + } +}