Google Apps Script; Docs; convert selected element to HTML

六月ゝ 毕业季﹏ 提交于 2019-12-01 11:11:24

There is a script GoogleDoc2HTML by Omar AL Zabir. Its purpose is to convert the entire document into HTML. Since you only want to convert rich text within the selected element, the function relevant to your task is processText from the script, shown below.

The method getTextAttributeIndices gives the starting offsets for each change of text attribute, like from normal to bold or back. If there is only one change, that's the attribute for the entire element (typically paragraph), and this is dealt with in the first part of if-statement.

The second part deals with the general case, looping over the indices and inserting HTML markup corresponding to the attributes.

The script isn't maintained, so consider it as a starting point for your own code, rather than a ready-to-use library. There are some unmerged PRs that improve the conversion process, in particular for inline links.

function processText(item, output) {
  var text = item.getText();
  var indices = item.getTextAttributeIndices();

  if (indices.length <= 1) {
    // Assuming that a whole para fully italic is a quote
    if(item.isBold()) {
      output.push('<b>' + text + '</b>');
    }
    else if(item.isItalic()) {
      output.push('<blockquote>' + text + '</blockquote>');
    }
    else if (text.trim().indexOf('http://') == 0) {
      output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
    }
    else {
      output.push(text);
    }
  }
  else {

    for (var i=0; i < indices.length; i ++) {
      var partAtts = item.getAttributes(indices[i]);
      var startPos = indices[i];
      var endPos = i+1 < indices.length ? indices[i+1]: text.length;
      var partText = text.substring(startPos, endPos);

      Logger.log(partText);

      if (partAtts.ITALIC) {
        output.push('<i>');
      }
      if (partAtts.BOLD) {
        output.push('<b>');
      }
      if (partAtts.UNDERLINE) {
        output.push('<u>');
      }

      // If someone has written [xxx] and made this whole text some special font, like superscript
      // then treat it as a reference and make it superscript.
      // Unfortunately in Google Docs, there's no way to detect superscript
      if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
        output.push('<sup>' + partText + '</sup>');
      }
      else if (partText.trim().indexOf('http://') == 0) {
        output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
      }
      else {
        output.push(partText);
      }

      if (partAtts.ITALIC) {
        output.push('</i>');
      }
      if (partAtts.BOLD) {
        output.push('</b>');
      }
      if (partAtts.UNDERLINE) {
        output.push('</u>');
      }

    }
  }
}

Ended up making a script to support my use-case of bold+links+italics:

function getHtmlOfElement(element) {
  var text = element.editAsText();
  var string = text.getText();
  var indices = text.getTextAttributeIndices();
  var output = [];

  for (var i = 0; i < indices.length; i++) {
    var offset = indices[i];
    var startPos = offset;
    var endPos = i+1 < indices.length ? indices[i+1]: string.length;
    var partText = string.substring(startPos, endPos);

    var isBold = text.isBold(offset);
    var isItalic = text.isItalic(offset);
    var linkUrl = text.getLinkUrl(offset);

    if (isBold) {
      output.push('<b>');
    }
    if (isItalic) {
      output.push('<i>');
    }
    if (linkUrl) {
      output.push('<a href="' + linkUrl + '">');
    }

    output.push(partText);

    if (isBold) {
      output.push('</b>');
    }
    if (isItalic) {
      output.push('</i>');
    }
    if (linkUrl) {
      output.push('</a>');
    }
  }

  return output.join("");
}

You can simply call it using something like:

getHtmlOfElement(myTableCell); // returns something like "<b>Bold</b> test."
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!