Remove html formatting when getting Body of a gmail message in javascript

雨燕双飞 提交于 2019-12-04 14:06:51
Mogsdad

Replace this:

// Print the message subject
sheet.getRange(row,3).setValue(messages[m].getBody());

With this:

// Print the message subject
sheet.getRange(row,3).setValue(getTextFromHtml(messages[m].getBody()));

The getTextFromHtml() function has been adapted from this answer, with the addition of handling for some basic formatting (numbered & bullet lists, paragraph breaks).

function getTextFromHtml(html) {
  return getTextFromNode(Xml.parse(html, true).getElement());
}

var _itemNum; // Used to lead unordered & ordered list items.

function getTextFromNode(x) {
  switch(x.toString()) {
    case 'XmlText': return x.toXmlString();
    case 'XmlElement':
      var name = x.getName().getLocalName();
      Logger.log(name);
      var pre = '';
      var post = '';
      switch (name) {
        case 'br':
        case 'p':
          pre = '';
          post = '\n';
          break;
        case 'ul':
          pre = '';
          post = '\n';
          itemNum = 0;
          break;
        case 'ol':
          pre = '';
          post = '\n';
          _itemNum = 1;
          break;
        case 'li':
          pre = '\n' + (_itemNum == 0 ? ' - ' : (' '+ _itemNum++ +'. '));
          post = '';
          break;
        default:
          pre = '';
          post = '';
          break;
      }
      return pre + x.getNodes().map(getTextFromNode).join('') + post;
    default: return '';
  }
}
Fernando Rivera

From this answer: Google Apps Scripts - Extract data from gmail into a spreadsheet

You can forgo the getTextFromHTML function altogether by simply using getPlainBody(); instead of getBody();.

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!