问题
I have a google spreadsheet that is populated by a form, so timestamps are automatically added in the first column for each row. I have a script that removes duplicate rows in my spreadsheet (5 specific columns must be the same for it to be a duplicate, while some other columns are ignored), but I want to modify it so that if I have multiple rows for the same person's data but with different timestamps, the script will keep the most recent row. How would I do this? Thanks!
/** removes duplicate rows in studentsheet **/
function removeDuplicates() {
var newData = new Array();
for(i in studentdata){
var row = studentdata[i];
var duplicate = false;
for(j in newData){
if(row[1] == newData[j][1] && row[2] == newData[j][2] && row[5] == newData[j][5] && row[9] == newData[j][9] && row[10] == newData[j][10]){
duplicate = true; //first name, last name, grade, dad's first name, and mom's first name are the same
}
}
if(!duplicate){
newData.push(row);
}
}
StudentSheet.clearContents();
StudentSheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
sortSheet(); //sorts sheet by 2 columns
}
回答1:
Here's a different approach, concattenating all columns in a single string, to save it as a object for faster searching, if you have a big sheet this can help:
function deleteDuplicateRowsSaveRecent(){
var verifiedRows = {},
curretnRow = "",
usedRows = [1, 2, 5, 9, 10];
for( lin in studentdata){
curretnRow = "";
for( ind in usedRows )
curretnRow += studentdata[ lin ][ usedRows[ ind ] ];
if(verifiedRows[ curretnRow ]){
if( studentdata[ lin ][ dateColumn ] > studentdata[ verifiedRows[ curretnRow ] ][ dateColumn ] ){
studentSheet.deleteRow(verifiedRows[ curretnRow ])
verifiedRows[ curretnRow ] = lin;
}else
studentSheet.deleteRow( lin );
}
else
verifiedRows[ curretnRow ] = lin;
}
}
Not tested but hopefully you'll get the logic.
回答2:
Sorts data so grouped by 'test for duplicates' data and then by date descending within group,
Starts at bottom making bottom row current row. Current row 'test for duplicates' tested against 'test for duplicates' in row above.
If current row duplicate of one above then deletes current row leaving the row above with the later date.
If not duplicate the row above becomes the current row and tested against the one above that deleting the current row if duplicate and moving on if not.
When complete replaces existing data in spreadsheet with modified data properly sorted.
function myFunction() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var s = ss.getSheetByName("Form Responses 1");
// dataRange should not include headers
var dataRange = s.getRange(2, 1, s.getLastRow() -1, s.getLastColumn())
var data = dataRange.getValues();
// Test for duplicate columns.
// numbers below = column number; A=1 B=2 etc.
var lName = 2;
var fName = 3;
var grade = 5;
var dad = 9;
var mom = 10;
for( var i = 0; i < data.length; i++ ) {
// add sortable date to beginning of rows
data[i].unshift(Utilities.formatDate(data[i][0], "GMT", "yyyyMMddHHmmss"));
// add sortable test for duplicates string in front of above date.
// Placing the below in the order to be sorted by will save
// a separate sort later
data[i].unshift(
data[i][lName].toLowerCase().trim() +
data[i][fName].toLowerCase().trim() +
data[i][grade].toString().trim() +
data[i][dad].toLowerCase().trim() +
data[i][mom].toLowerCase().trim())
}
// sort to group rows by test data
data.sort();
// reverse sort so latest date at top of each duplicate group.
data.reverse();
// test each row with one above and delete if duplicate.
var len = data.length - 1;
for( var i = len; i > 0; i-- ) {
if(data[i][0] == data[i-1][0]) {
data.splice(i, 1);
}
}
// remove temp sort items from beginning of rows
for( var i = 0; i < data.length; i++ ) {
data[i].splice(0, 2);
}
// Current sort descending. Reverse for ascending
data.reverse();
s.getRange(2, 1, s.getLastRow(), s.getLastColumn()).clearContent();
s.getRange(2, 1, data.length, data[0].length).setValues(data);
}
回答3:
After working up my previous answer, which I believe to be the better, I considered another approach that would cause less disruption to your existing code.
You push the first non duplicate from studentdata to the new array so if studentdata is sorted by timestamp descending before the test the first non duplicate encountered that is pushed will be the latest.
Placing the following at the very beginning of you function should achieve
for( var i = 0; i < studentdata.length; i++ ) {
// add sortable date to beginning of rows
studentdata[i].unshift(Utilities.formatDate(studentdata[i][0], "GMT", "yyyyMMddHHmmss"));
}
studentdata.sort();
studentdata.reverse();
// remove temp sort date from beginning of rows
for( var i = 0; i < studentdata.length; i++ ) {
studentdata[i].splice(0, 1);
}
回答4:
I decided to sort the date of submission column so that the most recent date was on top, and then run my original duplicate removal script. It seemed to work.
/** sorts studentsheet by most recent submission, by last name, and then by grade/role (columns) **/
function sortSheet() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName("Students");
sheet.sort(1, false); //sorts column A by date of submission with most recent on top
sheet.sort(3, true); // Sorts ascending (A-Z) by column C, last name
sheet.sort(6, true); // Sorts ascending (A-Z) by column F, grade/role
}
function removeDuplicates(){
var newData = new Array();
for(i in studentdata){
var row = studentdata[i];
var duplicate = false;
for(j in newData){
if(row[1] == newData[j][1] && row[2] == newData[j][2] && row[5] == newData[j][5] && row[9] == newData[j][9] && row[10] == newData[j][10]){
duplicate = true; //date of submission, first name, last name, grade, dad's first name, and mom's first name are the same
}
}
if(!duplicate){
newData.push(row);
}
}
StudentSheet.clearContents();
StudentSheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
来源:https://stackoverflow.com/questions/29201584/google-script-to-remove-duplicate-rows-in-spreadsheet-and-keep-the-most-recent-e