Troubles with pdf.js promises

和自甴很熟 提交于 2019-12-21 20:57:26

问题


I'm trying to implement a pdf word count in Javascript. I came across pdf.js which uses promises. Is there a way to wait till the script is done before returning the count? I know that this goes against the idea of promises, but the other js pdf readers out there either sometimes produce a bunch of gibberish or return nothing. In its current form the function always return a word count of 0.

function countWords(pdfUrl){
var pdf = PDFJS.getDocument(pdfUrl);
var count = 0;
pdf.then(function(pdf) {
     var maxPages = pdf.pdfInfo.numPages;
     for (var j = 1; j <= maxPages; j++) {
        var page = pdf.getPage(j);

        var txt = "";
        page.then(function(page) {
            var textContent = page.getTextContent();
            textContent.then(function(page){

            for(var i=0;i<page.items.length;i++){
                txtadd = page.items[i].str
                txt += txtadd.replace(/[^a-zA-Z0-9:;,.?!-() ]/g,'');
            }
                count = count + txt.split(" ").length;

            })
        })
     }
     return count;
});

}


回答1:


Promises cannot be handled in sync manner. The countWords cannot return value immediately and has to wait on inner promises (one for document and multiple for pages and text contexts) to be resolved. So countWords must return a Promise or accept callback. Best way is try to return and chain then() calls. When needed to join resolution use Promise.all:

function countWords(pdfUrl){
var pdf = PDFJS.getDocument(pdfUrl);
return pdf.then(function(pdf) { // calculate total count for document
     var maxPages = pdf.pdfInfo.numPages;
     var countPromises = []; // collecting all page promises
     for (var j = 1; j <= maxPages; j++) {
        var page = pdf.getPage(j);

        var txt = "";
        countPromises.push(page.then(function(page) { // add page promise
            var textContent = page.getTextContent();
            return textContent.then(function(page){ // return content promise

            for(var i=0;i<page.items.length;i++){
                txtadd = page.items[i].str
                txt += txtadd.replace(/[^a-zA-Z0-9:;,.?!-() ]/g,'');
            }
                return txt.split(" ").length; // value for page words

            });
        }));
     }
     // Wait for all pages and sum counts
     return Promise.all(countPromises).then(function (counts) {
       var count = 0;
       counts.forEach(function (c) { count += c; });
       return count;
     });
});
}
// waiting on countWords to finish completion, or error
countWords("https://cdn.mozilla.net/pdfjs/tracemonkey.pdf").then(function (count) {
  alert(count);
}, function (reason) {
  console.error(reason);
});
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>


来源:https://stackoverflow.com/questions/40482569/troubles-with-pdf-js-promises

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!