Filereader read file using correct encoding when read as readAsArrayBuffer

谁都会走 提交于 2019-11-30 05:52:54

问题


I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array containing each row . I was able to read the file and get data using FileReader and SheetJs with following code.

// code for the new excel reader
$scope.do_file =  function(files)
{
    $scope.fileContent  = [];
    var X = XLSX;
    var global_wb;
    var f = files[0];
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;console.log(data);
        global_wb = X.read(data, {type: 'array'});
        var output = "";
        var result = {};
        global_wb.SheetNames.forEach(function(sheetName) {
            var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
            if(roa.length) result[sheetName] = roa;
        });
        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
            $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }
    };
    reader.readAsArrayBuffer(f);
};

For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.

Looking in for more options i tried to read the file as text using reader.readAsText and change the encoding as necessary , check the following code:

function is_Hebrew(data)
{
    var position = data.search(/[\u0590-\u05FF]/);
    return position >= 0;
}

 $scope.do_file =  function(files)
 {
    var fullResult = [];
    var file =files[0];
      var reader = new FileReader();
        reader.onload = function(e){
            var data = e.target.result;
                if(!is_Hebrew(data.toString()))
                {
                  reader.readAsText(file,'ISO-8859-8');   
                }
            };
        reader.readAsText(file);
        reader.onloadend = function(){
            var lines = reader.result.split('\r\n');
            console.log(lines);
            lines.forEach(element => {
                var cell = element.split(',');
                fullResult.push(cell);
            });

             console.log(reader);
        };
    };

but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.

So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer to read the file data ?


回答1:


After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx files and second method for reading csv files. Also i have used an additional javaScript library called papaparse in the second method to solve the problem of reading data in each cell

$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}

// code for the new excel reader
$scope.do_file =  function(files)
{
    var config = {
    delimiter: "",  // auto-detect
    newline: "",    // auto-detect
    quoteChar: '"',
    escapeChar: '"',
    header: false,
    trimHeader: false,
    dynamicTyping: false,
    preview: 0,
    encoding: "",
    worker: false,
    comments: false,
    step: undefined,
    complete: undefined,
    error: undefined,
    download: false,
    skipEmptyLines: false,
    chunk: undefined,
    fastMode: undefined,
    beforeFirstChunk: undefined,
    withCredentials: undefined
    };

    $scope.fileContent  = [];
    var f = files[0];
    var fileExtension = f.name.replace(/^.*\./, '');
    if(fileExtension == 'xlsx')
    {
        var X = XLSX;
        var global_wb;
        var reader = new FileReader();
        reader.onload = function(e)
        {
            var data = e.target.result;
            global_wb = X.read(data, {type: 'array'});
            var result = {};
            global_wb.SheetNames.forEach(function(sheetName) {
               var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
               if(roa.length) result[sheetName] = roa;
            });
            $scope.fileContent =  result["Sheet1"];
            if(!result["Sheet1"])
            {
               $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
            }

        };
        reader.readAsArrayBuffer(f);

    }
    else if(fileExtension == 'csv')
    {
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;
        console.log(f);
        console.log($scope.is_Hebrew(data.toString()));
        if(!$scope.is_Hebrew(data.toString()))
        {
           reader.readAsText(f,'ISO-8859-8');   
        }
    };

    reader.readAsText(f);
    reader.onloadend = function(e){
        var c =  Papa.parse(reader.result,[ config])
        console.log(c);
        $scope.fileContent =  c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });

    };

    }
    else
    {
       alert("File Not supported!");
    }

$scope.fileContent.push([]);
};


来源:https://stackoverflow.com/questions/50385028/filereader-read-file-using-correct-encoding-when-read-as-readasarraybuffer

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!