oazabir / googledoc2html Goto Github PK
View Code? Open in Web Editor NEWExport Google Doc as clean html. Handy to make a Wordpress post from Google Doc.
Export Google Doc as clean html. Handy to make a Wordpress post from Google Doc.
Thanks to thejimbirch the new sript is here:
https://raw.githubusercontent.com/oazabir/GoogleDoc2Html/27a559acedc6384cf204b223913702cedcfda9a9/code.js
The updated instructions based on Google Docs Changes are here:
https://github.com/oazabir/GoogleDoc2Html/pull/1/files
Can you please add a license for this code? Is it open? MIT License? Thank you!
This script is working almost exactly as I need it, but for one crucial thing.
The processText
function uses getTextAttributeIndices()
to work out where bold, links, italics etc start and finish in a block of text. That works perfectly on a Text
element, but not on a TableCell
element.
I have content editors entering content into tables for specific HTML components, which I need to parse and render. But when trying to use this method the console gives me the message:
TypeError: Cannot find function getTextAttributeIndices in object TableCell.
...because there is mo method getTextAttributeIndices for a TableCell. And if I use getText()
on my table cell first, I lose the formatting and the indices vanish.
I cannot find a workaround at the moment.
I just wanted to let you know that the mail I received have some missing link, and the dirty google html code remains.
Tested in complex document and it generated error alerts... Add "if null" corrected,
function ConvertGoogleDocToCleanHtml() {
var body = DocumentApp.getActiveDocument().getBody();
var numChildren = body.getNumChildren();
var output = [];
var images = [];
var listCounters = {};
// Walk through all the child elements of the body.
for (var i = 0; i < numChildren; i++) {
var child = body.getChild(i);
output.push(processItem(child, listCounters, images));
}
var html = output.join('\r');
emailHtml(html, images);
//createDocumentForHtml(html, images);
}
function emailHtml(html, images) {
var attachments = [];
for (var j=0; j<images.length; j++) {
attachments.push( {
"fileName": images[j].name,
"mimeType": images[j].type,
"content": images[j].blob.getBytes() } );
}
var inlineImages = {};
for (var j=0; j<images.length; j++) {
inlineImages[[images[j].name]] = images[j].blob;
}
var name = DocumentApp.getActiveDocument().getName()+".html";
attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
MailApp.sendEmail({
to: Session.getActiveUser().getEmail(),
subject: name,
htmlBody: html,
inlineImages: inlineImages,
attachments: attachments
});
}
function createDocumentForHtml(html, images) {
var name = DocumentApp.getActiveDocument().getName()+".html";
var newDoc = DocumentApp.create(name);
newDoc.getBody().setText(html);
for(var j=0; j < images.length; j++)
newDoc.getBody().appendImage(images[j].blob);
newDoc.saveAndClose();
}
function dumpAttributes(atts) {
// Log the paragraph attributes.
for (var att in atts) {
Logger.log(att + ":" + atts[att]);
}
}
function processItem(item, listCounters, images) {
var output = [];
var prefix = "", suffix = "";
if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
switch (item.getHeading()) {
// Add a # for each heading level. No break, so we accumulate the right number.
case DocumentApp.ParagraphHeading.HEADING6:
prefix = "<h6>", suffix = "</h6>"; break;
case DocumentApp.ParagraphHeading.HEADING5:
prefix = "<h5>", suffix = "</h5>"; break;
case DocumentApp.ParagraphHeading.HEADING4:
prefix = "<h4>", suffix = "</h4>"; break;
case DocumentApp.ParagraphHeading.HEADING3:
prefix = "<h3>", suffix = "</h3>"; break;
case DocumentApp.ParagraphHeading.HEADING2:
prefix = "<h2>", suffix = "</h2>"; break;
case DocumentApp.ParagraphHeading.HEADING1:
prefix = "<h1>", suffix = "</h1>"; break;
default:
prefix = "<p>", suffix = "</p>";
}
if (item.getNumChildren() == 0)
return "";
}
else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
{
processImage(item, images, output);
}
else if (item.getType()===DocumentApp.ElementType.LIST_ITEM) {
var listItem = item;
var gt = listItem.getGlyphType();
var key = listItem.getListId() + '.' + listItem.getNestingLevel();
var counter = listCounters[key] || 0;
// First list item
if ( counter == 0 ) {
// Bullet list (<ul>):
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
prefix = '<ul class="small"><li>', suffix = "</li>";
suffix += "</ul>";
}
else {
// Ordered list (<ol>):
prefix = "<ol><li>", suffix = "</li>";
}
}
else {
prefix = "<li>";
suffix = "</li>";
}
if (item!==null && (item.isAtDocumentEnd() || (item.getNextSibling()!==null && item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) {
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
suffix += "</ul>";
}
else {
// Ordered list (<ol>):
suffix += "</ol>";
}
}
counter++;
listCounters[key] = counter;
}
output.push(prefix);
if (item!==null && item.getType() == DocumentApp.ElementType.TEXT) {
processText(item, output);
}
else {
if (item.getNumChildren) {
var numChildren = item.getNumChildren();
// Walk through all the child elements of the doc.
for (var i = 0; i < numChildren; i++) {
var child = item.getChild(i);
output.push(processItem(child, listCounters, images));
}
}
}
output.push(suffix);
return output.join('');
}
function processText(item, output) {
var text = item.getText();
var indices = item.getTextAttributeIndices();
if (indices.length <= 1) {
// Assuming that a whole para fully italic is a quote
if(item.isBold()) {
output.push('<b>' + text + '</b>');
}
else if(item.isItalic()) {
output.push('<blockquote>' + text + '</blockquote>');
}
else if (text.trim().indexOf('http://') == 0) {
output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
}
else {
output.push(text);
}
}
else {
for (var i=0; i < indices.length; i ++) {
var partAtts = item.getAttributes(indices[i]);
var startPos = indices[i];
var endPos = i+1 < indices.length ? indices[i+1]: text.length;
var partText = text.substring(startPos, endPos);
Logger.log(partText);
if (partAtts.ITALIC) {
output.push('<i>');
}
if (partAtts.BOLD) {
output.push('<b>');
}
if (partAtts.UNDERLINE) {
output.push('<u>');
}
// If someone has written [xxx] and made this whole text some special font, like superscript
// then treat it as a reference and make it superscript.
// Unfortunately in Google Docs, there's no way to detect superscript
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
output.push('<sup>' + partText + '</sup>');
}
else if (partText.trim().indexOf('http://') == 0) {
output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
}
else {
output.push(partText);
}
if (partAtts.ITALIC) {
output.push('</i>');
}
if (partAtts.BOLD) {
output.push('</b>');
}
if (partAtts.UNDERLINE) {
output.push('</u>');
}
}
}
}
function processImage(item, images, output)
{
images = images || [];
var blob = item.getBlob();
var contentType = blob.getContentType();
var extension = "";
if (/\/png$/.test(contentType)) {
extension = ".png";
} else if (/\/gif$/.test(contentType)) {
extension = ".gif";
} else if (/\/jpe?g$/.test(contentType)) {
extension = ".jpg";
} else {
throw "Unsupported image type: "+contentType;
}
var imagePrefix = "Image_";
var imageCounter = images.length;
var name = imagePrefix + imageCounter + extension;
imageCounter++;
output.push('<img src="cid:'+name+'" />');
images.push( {
"blob": blob,
"type": contentType,
"name": name});
}
works great
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.