[jruby-joni] 99/223: Experimental optimization by using string templates outside of bytecode stream. In most cases there will be no copies at all instead of converting strings into integer array.

Hideki Yamane henrich at moszumanska.debian.org
Mon Nov 16 11:21:55 UTC 2015


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit 5572dd33a707da6d4e745b29d973e944d9db8eb1
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Sun Feb 19 23:47:37 2012 +0100

    Experimental optimization by using string templates outside of bytecode
    stream.
    In most cases there will be no copies at all instead of converting
    strings into integer array.
---
 src/org/joni/Analyser.java         |  2 ++
 src/org/joni/ArrayCompiler.java    | 37 ++++++++++++++++++++++++++------
 src/org/joni/ByteCodeMachine.java  | 32 ++++++++++++++++++++++------
 src/org/joni/ByteCodePrinter.java  | 43 ++++++++++++++++++++++++++++++++------
 src/org/joni/Config.java           |  2 ++
 src/org/joni/Regex.java            |  4 ++++
 src/org/joni/ast/StringNode.java   |  2 +-
 src/org/joni/constants/OPSize.java |  1 +
 8 files changed, 104 insertions(+), 19 deletions(-)

diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 57bc6fe..b8995ce 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -172,7 +172,9 @@ final class Analyser extends Parser {
         if (Config.DEBUG_COMPILE) {
             if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString());
             Config.log.println("stack used: " + regex.stackNeeded);
+            if (Config.USE_STRING_TEMPLATES) Config.log.print("  templates: " + regex.templateNum);
             Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
+
         } // DEBUG_COMPILE
 
         regex.state = RegexState.NORMAL;
diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java
index 6193e5d..f863a3f 100644
--- a/src/org/joni/ArrayCompiler.java
+++ b/src/org/joni/ArrayCompiler.java
@@ -105,6 +105,10 @@ final class ArrayCompiler extends Compiler {
                 op == OPCode.EXACTN_IC_SB;
     }
 
+    private boolean opTemplated(int op) {
+        return op == OPCode.EXACTN || op == OPCode.EXACTMB2N;
+    }
+
     private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) {
         int op;
 
@@ -172,13 +176,16 @@ final class ArrayCompiler extends Compiler {
 
     private int addCompileStringlength(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
         int op = selectStrOpcode(mbLength, strLength, ignoreCase);
-
         int len = OPSize.OPCODE;
 
-        if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
-        if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
-
-        len += mbLength * strLength;
+        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
+            // string length, template index, template string pointer
+            len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX;
+        } else {
+            if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
+            if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
+            len += mbLength * strLength;
+        }
         return len;
     }
 
@@ -196,7 +203,14 @@ final class ArrayCompiler extends Compiler {
                 addLength(strLength);
             }
         }
-        addBytes(bytes, p, mbLength * strLength);
+
+        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
+            addInt(regex.templateNum);
+            addInt(p);
+            addTemplate(bytes);
+        } else {
+            addBytes(bytes, p, mbLength * strLength);
+        }
     }
 
     private int compileLengthStringNode(Node node) {
@@ -1234,4 +1248,15 @@ final class ArrayCompiler extends Compiler {
         addOpcode(opcode);
         addOption(option);
     }
+
+    private void addTemplate(byte[]bytes) {
+        if (regex.templateNum == 0) {
+            regex.templates = new byte[2][];
+        } else if (regex.templateNum == regex.templates.length) {
+            byte[][]tmp = new byte[regex.templateNum * 2][];
+            System.arraycopy(regex.templates, 0, tmp, 0, regex.templateNum);
+            regex.templates = tmp;
+        }
+        regex.templates[regex.templateNum++] = bytes;
+    }
 }
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
index bb2b18d..f36a2ce 100644
--- a/src/org/joni/ByteCodeMachine.java
+++ b/src/org/joni/ByteCodeMachine.java
@@ -473,7 +473,15 @@ class ByteCodeMachine extends StackMachine {
         int tlen = code[ip++];
         if (s + tlen > range) {opFail(); return;}
 
-        while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;}
+        if (Config.USE_STRING_TEMPLATES) {
+            byte[]bs = regex.templates[code[ip++]];
+            int ps = code[ip++];
+
+            while (tlen-- > 0) if (bs[ps++] != bytes[s++]) {opFail(); return;}
+
+        } else {
+            while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;}
+        }
         sprev = s - 1;
     }
 
@@ -520,11 +528,23 @@ class ByteCodeMachine extends StackMachine {
         int tlen = code[ip++];
         if (tlen * 2 > range) {opFail(); return;}
 
-        while(tlen-- > 0) {
-            if (code[ip] != bytes[s]) {opFail(); return;}
-            ip++; s++;
-            if (code[ip] != bytes[s]) {opFail(); return;}
-            ip++; s++;
+        if (Config.USE_STRING_TEMPLATES) {
+            byte[]bs = regex.templates[code[ip++]];
+            int ps = code[ip++];
+
+            while(tlen-- > 0) {
+                if (bs[ps] != bytes[s]) {opFail(); return;}
+                ps++; s++;
+                if (bs[ps] != bytes[s]) {opFail(); return;}
+                ps++; s++;
+            }
+        } else {
+            while(tlen-- > 0) {
+                if (code[ip] != bytes[s]) {opFail(); return;}
+                ip++; s++;
+                if (code[ip] != bytes[s]) {opFail(); return;}
+                ip++; s++;
+            }
         }
         sprev = s - 2;
     }
diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java
index 0a23211..4cddca3 100644
--- a/src/org/joni/ByteCodePrinter.java
+++ b/src/org/joni/ByteCodePrinter.java
@@ -27,8 +27,9 @@ import org.joni.constants.OPSize;
 import org.joni.exception.InternalException;
 
 class ByteCodePrinter {
-    int[]code;
-    int codeLength;
+    final int[]code;
+    final int codeLength;
+    final byte[][] templates;
 
     Object[]operands;
     int operantCount;
@@ -40,6 +41,8 @@ class ByteCodePrinter {
         codeLength = regex.codeLength;
         operands = regex.operands;
         operantCount = regex.operandLength;
+
+        templates = regex.templates;
         enc = regex.enc;
         warnings = regex.warnings;
     }
@@ -53,16 +56,28 @@ class ByteCodePrinter {
         while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
     }
 
+    private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) {
+        sb.append(":T:");
+        while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]}));
+    }
+
     private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
         int x = len * mbLen;
         sb.append(":" + len + ":");
         while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
     }
 
+    private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, byte[]tm, int idx) {
+        int x = len * mbLen;
+        sb.append(":T:" + len + ":");
+        while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]}));
+    }
+
     public int compiledByteCodeToString(StringBuilder sb, int bp) {
         int len, n, mem, addr, scn, cod;
         BitSet bs;
         CClassNode cc;
+        int tm, idx;
 
         sb.append("[" + OPCode.OpCodeNames[code[bp]]);
         int argType = OPCode.OpCodeArgTypes[code[bp]];
@@ -136,8 +151,16 @@ class ByteCodePrinter {
             case OPCode.EXACTN:
                 len = code[bp];
                 bp += OPSize.LENGTH;
-                pLenString(sb, len, 1, bp);
-                bp += len;
+                if (Config.USE_STRING_TEMPLATES) {
+                    tm = code[bp];
+                    bp += OPSize.INDEX;
+                    idx = code[bp];
+                    bp += OPSize.INDEX;
+                    pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
+                } else {
+                    pLenString(sb, len, 1, bp);
+                    bp += len;
+                }
                 break;
 
             case OPCode.EXACTMB2N1:
@@ -158,8 +181,16 @@ class ByteCodePrinter {
             case OPCode.EXACTMB2N:
                 len = code[bp];
                 bp += OPSize.LENGTH;
-                pLenString(sb, len, 2, bp);
-                bp += len * 2;
+                if (Config.USE_STRING_TEMPLATES) {
+                    tm = code[bp];
+                    bp += OPSize.INDEX;
+                    idx = code[bp];
+                    bp += OPSize.INDEX;
+                    pLenStringFromTemplate(sb, len, 2, templates[tm], idx);
+                } else {
+                    pLenString(sb, len, 2, bp);
+                    bp += len * 2;
+                }
                 break;
 
             case OPCode.EXACTMB3N:
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
index 5b9fdbc..dab3565 100644
--- a/src/org/joni/Config.java
+++ b/src/org/joni/Config.java
@@ -64,6 +64,8 @@ public interface Config extends org.jcodings.Config {
 
     final boolean DONT_OPTIMIZE                     = false;
 
+    final boolean USE_STRING_TEMPLATES              = true; // use embeded string templates in Regex object as byte arrays instead of compiling them into int bytecode array
+
 
     final int MAX_CAPTURE_HISTORY_GROUP             = 31;
 
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
index 744d9d3..661ce2d 100644
--- a/src/org/joni/Regex.java
+++ b/src/org/joni/Regex.java
@@ -90,6 +90,9 @@ public final class Regex implements RegexState {
     int dMin;                               /* min-distance of exact or map */
     int dMax;                               /* max-distance of exact or map */
 
+    byte[][]templates;
+    int templateNum;
+
     public Regex(CharSequence cs) {
         this(cs.toString());
     }
@@ -427,6 +430,7 @@ public final class Regex implements RegexState {
                 s += "]\n";
             }
         }
+
         return s;
     }
 
diff --git a/src/org/joni/ast/StringNode.java b/src/org/joni/ast/StringNode.java
index fb83567..2857c94 100644
--- a/src/org/joni/ast/StringNode.java
+++ b/src/org/joni/ast/StringNode.java
@@ -99,7 +99,7 @@ public final class StringNode extends Node implements StringType {
             if ((bytes[i] & 0xff) >= 0x20 && (bytes[i] & 0xff) < 0x7f) {
                 value.append((char)bytes[i]);
             } else {
-                value.append(String.format("0x%02x", bytes[i]));
+                value.append(String.format("[0x%02x]", bytes[i]));
             }
         }
         value.append("'");
diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java
index 05efbed..d5595ad 100644
--- a/src/org/joni/constants/OPSize.java
+++ b/src/org/joni/constants/OPSize.java
@@ -32,6 +32,7 @@ public interface OPSize {
     final int OPTION                = 1;
     final int CODE_POINT            = 1;
     final int POINTER               = 1;
+    final int INDEX                 = 1;
 
     /* op-code + arg size */
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list