[Pkg-javascript-commits] [pdf.js] 191/414: Replace `getAll` with `getKeys` in `PartialEvaluator_hasBlendModes` to speed up loading of badly generated PDF files (issue 6961)

David Prévot taffit at moszumanska.debian.org
Tue Jun 28 17:12:20 UTC 2016


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository pdf.js.

commit 07e1ad40a2c8cb4d6e46053fe2714d57f2fbca01
Author: Jonas Jenwald <jonas.jenwald at gmail.com>
Date:   Tue Feb 9 17:09:17 2016 +0100

    Replace `getAll` with `getKeys` in `PartialEvaluator_hasBlendModes` to speed up loading of badly generated PDF files (issue 6961)
    
    Some bad PDF generators, in particular "Scribus PDF", duplicates resources *a lot* at various levels of the PDF files. This can lead to `PartialEvaluator_hasBlendModes` taking an unreasonable amount of time to complete.
    The reason is that the current code is using `Dict_getAll`, which recursively dereferences *all* indirect objects, which can be really slow. This patch instead uses `Dict_getKeys`, and then manually looks up only the necessary indirect objects.
    
    I've added the PDF file as a `load` test. The most important thing here is probably to ensure that the file remains available in the repo, and the comment should help reduced the chance of regressions. (Note that locally, the `load` test times out without this patch, but we cannot really assume that that always happens.)
    
    Fixes 6961.
---
 src/core/evaluator.js   |  31 ++++++++++++++++++++++---------
 test/pdfs/.gitignore    |   1 +
 test/pdfs/issue6961.pdf | Bin 0 -> 2156587 bytes
 test/test_manifest.json |   8 ++++++++
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 044cc7c..0a3a5df 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -151,17 +151,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         processed[resources.objId] = true;
       }
 
-      var nodes = [resources];
+      var nodes = [resources], xref = this.xref;
       while (nodes.length) {
-        var key;
+        var key, i, ii;
         var node = nodes.shift();
         // First check the current resources for blend modes.
         var graphicStates = node.get('ExtGState');
         if (isDict(graphicStates)) {
-          graphicStates = graphicStates.getAll();
-          for (key in graphicStates) {
-            var graphicState = graphicStates[key];
-            var bm = graphicState['BM'];
+          var graphicStatesKeys = graphicStates.getKeys();
+          for (i = 0, ii = graphicStatesKeys.length; i < ii; i++) {
+            key = graphicStatesKeys[i];
+
+            var graphicState = graphicStates.get(key);
+            var bm = graphicState.get('BM');
             if (isName(bm) && bm.name !== 'Normal') {
               return true;
             }
@@ -172,9 +174,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         if (!isDict(xObjects)) {
           continue;
         }
-        xObjects = xObjects.getAll();
-        for (key in xObjects) {
-          var xObject = xObjects[key];
+        var xObjectsKeys = xObjects.getKeys();
+        for (i = 0, ii = xObjectsKeys.length; i < ii; i++) {
+          key = xObjectsKeys[i];
+
+          var xObject = xObjects.getRaw(key);
+          if (isRef(xObject)) {
+            if (processed[xObject.toString()]) {
+              // The XObject has already been processed, and by avoiding a
+              // redundant `xref.fetch` we can *significantly* reduce the load
+              // time for badly generated PDF files (fixes issue6961.pdf).
+              continue;
+            }
+            xObject = xref.fetch(xObject);
+          }
           if (!isStream(xObject)) {
             continue;
           }
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 5be1fc3..911781e 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -18,6 +18,7 @@
 !issue5972.pdf
 !issue5874.pdf
 !issue6782.pdf
+!issue6961.pdf
 !filled-background.pdf
 !ArabicCIDTrueType.pdf
 !ThuluthFeatures.pdf
diff --git a/test/pdfs/issue6961.pdf b/test/pdfs/issue6961.pdf
new file mode 100644
index 0000000..b52d548
Binary files /dev/null and b/test/pdfs/issue6961.pdf differ
diff --git a/test/test_manifest.json b/test/test_manifest.json
index 249b018..e796620 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -2760,5 +2760,13 @@
        "md5": "8961cb55149495989a80bf0487e0f076",
        "rounds": 1,
        "type": "load"
+    },
+    {  "id": "issue6961",
+       "file": "pdfs/issue6961.pdf",
+       "md5": "a80e4357a8fda758d96c2c76f2980b03",
+       "link": false,
+       "rounds": 1,
+       "lastPage": 1,
+       "type": "load"
     }
 ]

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/pdf.js.git



More information about the Pkg-javascript-commits mailing list