[jruby-joni] 208/223: Support for \X

Hideki Yamane henrich at moszumanska.debian.org
Mon Nov 16 11:22:14 UTC 2015


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit 061676e4a115849e6ca1cc8abbb9dd30d5b57fa8
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Fri Apr 10 01:17:53 2015 +0200

    Support for \X
---
 src/org/joni/Lexer.java                            |  3 ++
 src/org/joni/Parser.java                           | 30 +++++++++++
 src/org/joni/Syntax.java                           |  5 +-
 src/org/joni/constants/TokenType.java              |  1 +
 test/org/joni/test/TestA.java                      |  2 +
 .../org/joni/test/TestAU.java                      | 61 ++++++++++++----------
 test/org/joni/test/TestJoni.java                   |  3 ++
 test/org/joni/test/TestU8.java                     |  2 +
 8 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index 2287e77..f447059 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -1125,6 +1125,9 @@ class Lexer extends ScannerSupport {
                 case 'R':
                     if (syntax.op2EscCapitalRLinebreak()) token.type = TokenType.LINEBREAK;
                     break;
+                case 'X':
+                    if (syntax.op2EscCapitalXExtendedGraphemeCluster()) token.type = TokenType.EXTENDED_GRAPHEME_CLUSTER;
+                    break;
                 default:
                     unfetch();
                     int num = fetchEscapedValue();
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 2f01a1c..3891416 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -27,6 +27,8 @@ import static org.joni.Option.isIgnoreCase;
 import org.jcodings.Ptr;
 import org.jcodings.constants.CharacterType;
 import org.jcodings.constants.PosixBracket;
+import org.jcodings.specific.ASCIIEncoding;
+import org.jcodings.unicode.UnicodeEncoding;
 import org.joni.ast.AnchorNode;
 import org.joni.ast.AnyCharNode;
 import org.joni.ast.BackRefNode;
@@ -759,6 +761,34 @@ class Parser extends Lexer {
             node = en;
             break;
 
+        case EXTENDED_GRAPHEME_CLUSTER:
+            if (Config.USE_UNICODE_PROPERTIES) {
+                if (enc instanceof UnicodeEncoding) {
+                    int ctype = enc.propertyNameToCType(new byte[]{(byte)'M'}, 0, 1);
+                    if (ctype > 0) {
+                        CClassNode cc1 = new CClassNode(); /* \P{M} */
+                        cc1.addCType(ctype, false, env, this);
+                        cc1.setNot();
+                        CClassNode cc2 = new CClassNode(); /* \p{M}* */
+                        cc1.addCType(ctype, false, env, this);
+                        QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+                        qn.setTarget(cc2);
+                        /* \P{M}\p{M}* */
+                        ConsAltNode list2 = ConsAltNode.newListNode(qn, null);
+                        ConsAltNode list1 = ConsAltNode.newListNode(cc1, list2);
+                        EncloseNode en2 = new EncloseNode(EncloseType.STOP_BACKTRACK);
+                        en2.setTarget(list1);
+                        node = en2;
+                    }
+                }
+            }
+            if (node == null) {
+                AnyCharNode np1 = new AnyCharNode();
+                EncloseNode on = new EncloseNode(bsOnOff(env.option, Option.MULTILINE, false), 0);
+                on.setTarget(np1);
+                node = np1;
+            }
+            break;
         case STRING:
             return parseExpTkByte(group); // tk_byte:
 
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
index baea7c6..47f069d 100644
--- a/src/org/joni/Syntax.java
+++ b/src/org/joni/Syntax.java
@@ -420,8 +420,9 @@ public final class Syntax implements SyntaxProperties{
         OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL |
         OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB |
         OP2_ESC_H_XDIGIT |
-        OP2_ESC_CAPITAL_R_LINEBREAK |
-        OP2_QMARK_LPAREN_CONDITION),
+        OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
+        OP2_QMARK_LPAREN_CONDITION |
+        OP2_ESC_CAPITAL_R_LINEBREAK),
 
         ( GNU_REGEX_BV |
         ALLOW_INTERVAL_LOW_ABBREV |
diff --git a/src/org/joni/constants/TokenType.java b/src/org/joni/constants/TokenType.java
index f9a18bd..3dc8386 100644
--- a/src/org/joni/constants/TokenType.java
+++ b/src/org/joni/constants/TokenType.java
@@ -40,6 +40,7 @@ public enum TokenType {
       QUOTE_OPEN,
       CHAR_PROPERTY,    /* \p{...}, \P{...} */
       LINEBREAK,
+      EXTENDED_GRAPHEME_CLUSTER,
       /* in cc */
       CC_CLOSE,
       CC_RANGE,
diff --git a/test/org/joni/test/TestA.java b/test/org/joni/test/TestA.java
index 7b5f689..de5e3b5 100644
--- a/test/org/joni/test/TestA.java
+++ b/test/org/joni/test/TestA.java
@@ -526,6 +526,8 @@ public class TestA extends Test {
         x2s("\\R", "\n", 0, 1);
         x2s("\\R", "\r", 0, 1);
         x2s("\\R{3}", "\r\r\n\n", 0, 4);
+
+        x2s("\\X{5}", "あいab\n", 0, 5);
     }
 
     public static void main(String[] args) throws Throwable{
diff --git a/src/org/joni/constants/TokenType.java b/test/org/joni/test/TestAU.java
similarity index 62%
copy from src/org/joni/constants/TokenType.java
copy to test/org/joni/test/TestAU.java
index f9a18bd..0d37a88 100644
--- a/src/org/joni/constants/TokenType.java
+++ b/test/org/joni/test/TestAU.java
@@ -17,33 +17,36 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-package org.joni.constants;
-
-public enum TokenType {
-      EOT,            /* end of token */
-      RAW_BYTE,
-      CHAR,
-      STRING,
-      CODE_POINT,
-      ANYCHAR,
-      CHAR_TYPE,
-      BACKREF,
-      CALL,
-      ANCHOR,
-      OP_REPEAT,
-      INTERVAL,
-      ANYCHAR_ANYTIME,  /* SQL '%' == .* */
-      ALT,
-      SUBEXP_OPEN,
-      SUBEXP_CLOSE,
-      CC_OPEN,
-      QUOTE_OPEN,
-      CHAR_PROPERTY,    /* \p{...}, \P{...} */
-      LINEBREAK,
-      /* in cc */
-      CC_CLOSE,
-      CC_RANGE,
-      POSIX_BRACKET_OPEN,
-      CC_AND,             /* && */
-      CC_CC_OPEN          /* [ */
+package org.joni.test;
+
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+import org.joni.Option;
+import org.joni.Syntax;
+
+public class TestAU extends Test {
+
+    public int option() {
+        return Option.DEFAULT;
+    }
+
+    public Encoding encoding() {
+        return ASCIIEncoding.INSTANCE;
+    }
+
+    public String testEncoding() {
+        return "utf-8";
+    }
+
+    public Syntax syntax() {
+        return Syntax.DEFAULT;
+    }
+
+    public void test() throws InterruptedException {
+        x2s("\\X{5}", "あいab\n", 0, 5);
+    }
+
+    public static void main(String[] args) throws Throwable {
+        new TestU8().run();
+    }
 }
diff --git a/test/org/joni/test/TestJoni.java b/test/org/joni/test/TestJoni.java
index d10924d..d7f4fe6 100644
--- a/test/org/joni/test/TestJoni.java
+++ b/test/org/joni/test/TestJoni.java
@@ -31,6 +31,7 @@ public class TestJoni extends TestCase {
     private Test testu8;
     private Test testInterrupt;
     private Test testPerl;
+    private Test testAsciiViaUtf;
 
     protected void setUp() {
         testa = new TestA();
@@ -41,6 +42,7 @@ public class TestJoni extends TestCase {
         testLookBehind = new TestLookBehind();
         testInterrupt = new TestInterrupt();
         testPerl = new TestPerl();
+        testAsciiViaUtf = new TestAU();
     }
 
     protected void tearDown() {
@@ -54,6 +56,7 @@ public class TestJoni extends TestCase {
 
     public void testAscii() {
         testJoniTest(testa);
+        testJoniTest(testAsciiViaUtf);
     }
 
     public void testEUCJP() {
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index 86c2812..9723100 100644
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -84,6 +84,8 @@ public class TestU8 extends Test {
         x2s("\\R", "\u0085", 0, 2);
         x2s("\\R", "\u2028", 0, 3);
         x2s("\\R", "\u2029", 0, 3);
+
+        x2s("\\X", "\u306F\u309A\n", 0, 3);
     }
 
     public static void main(String[] args) throws Throwable {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list