Get Google Document as HTML

后端 未结 8 1982
无人及你
无人及你 2020-11-27 17:28

I had a wild idea that I could build a website blog for an unsophisticated user friend using Google Drive Documents to back it. I was able to create a contentService that c

相关标签:
8条回答
  • 2020-11-27 18:03

    You may use the solution here

    /**
     * Converts a file to HTML. The Advanced Drive service must be enabled to use
     * this function.
     */
    function convertToHtml(fileId) {
      var file = Drive.Files.get(fileId);
      var htmlExportLink = file.exportLinks['text/html'];
      if (!htmlExportLink) {
        throw 'File cannot be converted to HTML.';
      }
      var oAuthToken = ScriptApp.getOAuthToken();
      var response = UrlFetchApp.fetch(htmlExportLink, {
        headers:{
          'Authorization': 'Bearer ' + oAuthToken
        },
        muteHttpExceptions: true
      });
      if (!response.getResponseCode() == 200) {
        throw 'Error converting to HTML: ' + response.getContentText();
      }
      return response.getContentText();
    }
    

    Pass as fileId, the id of the google doc and to enable advanced drive services follow the instructions here.

    0 讨论(0)
  • 2020-11-27 18:04

    There is no direct method in GAS to get an HTML version of a doc and this is quite an old enhancement request but the workaround described originally by Henrique Abreu works pretty well, I use it all the time...

    The only annoying thing in the authorization process that needs to be called from the script editor which makes it uneasy to use in a shared application (with "script unable" users) but this only happens once ;).

    There is also a Library created by Romain Vialard that makes things (a bit) easier... and adds a few other interesting functions.

    0 讨论(0)
  • 2020-11-27 18:07

    Node.js Solution

    Using the Google APIs Node.js Client

    Here's how you can get a google doc as html using google drive's node.js client library.

    // import googleapis npm package
    var google = require('googleapis');
    
    // variables
    var fileId = '<google drive doc file id>',
        accessToken = '<oauth access token>';
    
    // oauth setup
    var OAuth2 = google.auth.OAuth2,
        OAuth2Client = new OAuth2();
    
    // set oauth credentials
    OAuth2Client.setCredentials({access_token: accessToken});
    
    // google drive setup
    var drive = google.drive({version: 'v3', auth: OAuth2Client});
    
    // download file as text/html
    var buffers = [];
    drive.files.export(
        {
            fileId: fileId,
            mimeType: 'text/html'
        }
    )
        .on('error', function(err) {
            // handle error
        })
        .on('data', function(data) {
            buffers.push(data); // data is a buffer
        })
        .on('end', function() {
            var buffer = Buffer.concat(buffers),
                googleDocAsHtml = buffer.toString();
            console.log(googleDocAsHtml);
        });
    

    Take a look at the Google Drive V3 download docs for more languages and options.

    0 讨论(0)
  • 2020-11-27 18:07

    Google docs currently has a function to do this. Just download to zip(.html) and you can have a zip archive with html & image (if inserted)

    I know this is not solution based on code, but its working :)

    0 讨论(0)
  • 2020-11-27 18:12

    I've had this problem as well. The HTML that the Document HTML Export spits out is really ugly, so this was my solution:

    /**
     * Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string.
     *
     * @param {string} the id of the google doc
     * @param {boolean} [useCaching] enable or disable caching. default true.
     * @return {string} the doc's body in html format
     */
    function getContent(id, useCaching) {
    
      if (!id) {
        throw "Please call this API with a valid Google Doc ID";
      }
    
      if (useCaching == null) {
        useCaching = true;
      }
    
      if (typeof useCaching != "boolean") {
        throw "If you're going to specify useCaching, it must be boolean.";
      }
    
      var cache = CacheService.getScriptCache();
      var cached = cache.get(id); // see if we have a cached version of our parsed html
      if (cached && useCaching) {
        var html = cached;
        Logger.log("Pulling doc html from cache...");
      } else {
    
        Logger.log("Grabbing and parsing fresh html from the doc...");
    
        try {
          var doc = DriveApp.getFileById(id);
        } catch (err) {
          throw "Please call this API with a valid Google Doc ID. " + err.message;
        }
    
        var docName = doc.getName();
    
        var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken();
        var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html";
        var param = {
          method: "get",
          headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
          muteHttpExceptions:true,
        };
    
        var html = UrlFetchApp.fetch(url, param).getContentText();
    
        // nuke the whole head section, including the stylesheet and meta tag
        html = html.replace(/<head>.*<\/head>/, '');
        // remove almost all html attributes
        html = html.replace(/ (id|class|style|start|colspan|rowspan)="[^"]*"/g, '');
        // remove all of the spans, as well as the outer html and body
        html = html.replace(/<(span|\/span|body|\/body|html|\/html)>/g, '');
        // clearly the superior way of denoting line breaks
        html = html.replace(/<br>/g, '<br />');
    
        cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests
    
      }
    
      Logger.log(html);
    
      return html;
    
    }
    

    https://gist.github.com/xd1936/cc229d14a89e6327336177bb07ac2980

    0 讨论(0)
  • 2020-11-27 18:13

    You can try this code :

      function getGoogleDocumentAsHTML(){
      var id = DocumentApp.getActiveDocument().getId() ;
      var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested
      var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+id+"&exportFormat=html";
      var param = {
        method      : "get",
        headers     : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
        muteHttpExceptions:true,
      };
      var html = UrlFetchApp.fetch(url,param).getContentText();
      Logger.log(html);
    }
    
    0 讨论(0)
提交回复
热议问题