Google Apps Script - Customizable Fast XML Parser | AdsUpNow
 

Google Apps Script – Customizable Fast XML Parser

June 1, 2017

There are a few Google Apps scripts out there already for fetching an XML document and pasting it into a Google sheet, but all of the ones I could find were slower than they should have been, and were not easy modified to only pull certain subsets of the feed (i.e. columns). I needed a customizable solution in a hurry, so I whipped up this Google Apps Script code (Javascript that runs on Google’s servers) to fetch an XML feed via UrlFetchApp, parse it, and then paste the results into a specific Google Sheet. It was developed in a hurry, so it is not the cleanest or best code, but it works, and works fast (about 10 seconds for a feed with about 1800 items).

Because this Google script is designed to run outside of an actual spreadsheet, you can use this code in a single apps script file to process hundreds of XML feeds and send to separate sheets, or even separate Google Sheet documents. This makes it a great fit for processing inventory feeds for multiple websites and/or clients.

You can customize both namespace and the fields that should get pulled (e.g. “id”, “title”, “price”, etc.).

I am publishing this code under Creative Commons Attribution 3.0 – Basically, do whatever you want with it, but if you feel like being nice, put a link back to this page 🙂

 

/*
++++++++++ ------ Copyrigh Joshua Tz ------ +++++++++
+++++ ----- AdsUpNow http://adsupnow.com ----- ++++++
+++++++++++++++++++++++++++++++++++++++++++++++++++++
++ Major Reference: https://developers.google.com/apps-script/reference/xml-service/
*/

// Execution time: ~10 seconds for a feed with 1796 items, of which most of the time was waiting for the remote URL to respond with the feed during UrlFetchApp.fetch(XMLFeedURL)

// FeedURL: url to XML document (RSS feed, feed exports, etc.)
// sheetsFileDestinationURL: link to google sheets document (e.g. https://docs.google.com/spreadsheets/d/________/edit)
// rawPasteSheetName: the name of the sheet that the content should be pasted into, EXACTLY
// OPT_childNamesArray: Optional - children of each item in the array that you want to extract - these become columns in the export. You can omit if you just want a dump of the entire XML feed
// OPT_Namespace: Optional depending on your XML structure. If a namespace is declared, you should use XmlService.getNamespace and pass the result as the argument (see example below for Google Shopping Feed)
 
/* Example:
// Scenario: Feed is export from a shopify plugin, formatted for Google Shopping.

// Public URL of XML document
var XMLFeedURL = "https://www.totallyfakeshopifywebsiteblahblah.com/a/feed/facebook.rss";

// Optional - specify which columns to get pulled in
var columns = ["id","title","description","link","image_link","brand","condition","availability","price","mpn","shipping_weight","item_group_id","product_type"];

// namespace for Google Merchant attributes - necessary
var namespaceObject = XmlService.getNamespace("g", "http://base.google.com/ns/1.0"); 

// Put it all together
processXML(XMLFeedURL,"https://docs.google.com/spreadsheets/d/1f4mLMdyRTfeFePsTnMQG-HbF423Ef2sdfasdf/edit","Raw RSS",columns,namespaceObject);
*/

function processXML(FeedURL,sheetsFileDestinationURL,rawPasteSheetName,OPT_childNamesArray,OPT_Namespace){
  var GoogleSheetsFile = SpreadsheetApp.openByUrl(sheetsFileDestinationURL);
  var GoogleSheetsPastePage = GoogleSheetsFile.getSheetByName(rawPasteSheetName);
  if (OPT_childNamesArray){
    GoogleSheetsPastePage.getDataRange().offset(1,0).clearContent(); // get all filled cells, omitting the header row, and clear content
  }
  else {
    GoogleSheetsPastePage.getDataRange().offset(0,0).clearContent(); // get all filled cells, INCLUDING the header row, and clear content
  }
  
  // Generate 2d/md array / rows export based on requested columns and feed
  var exportRows = []; // hold all the rows that are generated to be pasted into the sheet
  var XMLFeedURL = FeedURL;
  var feedContent = UrlFetchApp.fetch(XMLFeedURL).getContentText(); // get the full feed content
  var feedItems = XmlService.parse(feedContent).getRootElement().getChild('channel').getChildren('item'); // get all items in the feed
  for (var x=0; x<feedItems.length; x++){
    // Iterate through items in the XML/RSS feed
    var currentFeedItem = feedItems[x];
    var singleItemArray = []; // use to hold all the values for this single item/row
    
    // Parse for specific children (requires names and namespace)
    if (OPT_childNamesArray){
      for (var y=0; y<OPT_childNamesArray.length; y++){
        // Iterate through requested children by name and fill rows
        var currentChildName = OPT_childNamesArray[y];
        if (OPT_Namespace){
          
          if (currentFeedItem.getChild(OPT_childNamesArray[y],OPT_Namespace)){
            singleItemArray.push(currentFeedItem.getChildText(OPT_childNamesArray[y],OPT_Namespace));
          }
          else {
            singleItemArray.push("null");
          }
        }
        else {
          if (currentFeedItem.getChild(OPT_childNamesArray[y])){
            singleItemArray.push(currentFeedItem.getChildText(OPT_childNamesArray[y]));
          }
          else {
            singleItemArray.push("null");
          }
        }
      }
      exportRows.push(singleItemArray);
    }
    
    // Parse for ALL children, does not require knowing names or namespace
    else if (!OPT_childNamesArray){
      var allChildren = currentFeedItem.getChildren();
      
      if (x == 0){
        // if looking at first item, create a header row first with column headings
        var headerRow = [];
        for (var h=0; h<allChildren.length; h++){
          headerRow.push(allChildren[h].getName());
        }
        exportRows.push(headerRow);
      }
      
      for (var c=0; c<allChildren.length; c++){
        singleItemArray.push(allChildren[c].getText());
      }
      
      exportRows.push(singleItemArray);
    }
  }
  
  // Paste the generated md array export into the spreadsheet
  if (OPT_childNamesArray){
    GoogleSheetsPastePage.getRange(2,1,exportRows.length,exportRows[1].length).setValues(exportRows);
  }
  else if (!OPT_childNamesArray){
    var maxRangeLength = 0;
    var currentRowIndex = 1;
    for (var x = 0; x<exportRows.length; x++){
      if (exportRows[x].length > maxRangeLength){
        maxRangeLength = exportRows[x].length;
      }
      GoogleSheetsPastePage.getRange(currentRowIndex,1,1,exportRows[x].length).setValues([exportRows[x]]);
      currentRowIndex++;
    }
  }
}

Leave a Reply

Your email address will not be published. Required fields are marked *

More Services