Coder Social home page Coder Social logo

Comments (10)

rkusa avatar rkusa commented on August 24, 2024

Hi, thanks for reporting your issue!

Regarding toStream(): I would also pretty much love such a functionality. However, it would not work. For example, when having the total page count on each page, you have to update each preceding page when rendering a new one. This is just one example that prevents streaming functionality :-(

I'll try to trigger the same error first so that I may not need your source data. Are you using version 1.0.0-alpha.5?

from pdfjs.

rkusa avatar rkusa commented on August 24, 2024

Are you saving the pdf with encoding set to binary? e.g.

fs.writeFile(fileName, pdfString, 'binary', function () { 
  console.log('saved');
});

Edit: I think I am not able to reproduce you issue. Are you getting the location of where in your PDF the issue occurs? If so you could also give me the some lines surrounding this location straight out of the .pdf file.

Thanks

from pdfjs.

keithrz avatar keithrz commented on August 24, 2024

Yes, I'm using that version, and yes, I've been saving the file as binary.

I think if you create enough data you'll be able to recreate. Thanks for
the info about streaming!

On Saturday, October 10, 2015, Markus Ast [email protected] wrote:

Hi, thanks for reporting your issue!

Regarding toStream(): I would also pretty much love such a functionality.
However, it would not work. For example, when having the total page count
on each page, you have to update each preceding page when rendering a new
one. This is just one example that prevents streaming functionality :-(

I'll try to trigger the same error first so that I may not need your
source data. Are you using version 1.0.0-alpha.5?


Reply to this email directly or view it on GitHub
#46 (comment).

from pdfjs.

rkusa avatar rkusa commented on August 24, 2024

Are you just adding text, or also images?

from pdfjs.

keithrz avatar keithrz commented on August 24, 2024

Just text.

Here is the code that I use to create each document:

var underscore = require('underscore');
var fs = require('fs');
var tmp = require('tmp');
var path = require('path');
var pdfjs = require('pdfjs');
var formatNumber = require('format-number');
var moment = require('moment-timezone');


/**
 * Creates a pdf using data from a given array.
 * @param metadata (object) : object defining how the pdf should be formatted
 * @param pages (Array) : each array item should have {pageTitle: string, data: object}
 * @param next (Function) : callback with params (err, filepath) : filepath is the path of the generated pdf.
 */
function pdfFromDataAsFile(metadata, pages, next) {
    var docMetadataEnsureFont = underscore.defaults(convertFontName(metadata.doc), {font: regularFont});

    var doc = new pdfjs.Document(docMetadataEnsureFont);

    // header and footer common to all pages
    var header = doc.header();
    header.text(metadata.docTitle, {font: boldFont, fontSize: 12});

    var footer = doc.footer();
    var table, tr;
    footer.text({textAlign: 'center'}).append('Page  ').pageNumber().append('  of  ').pageCount();

    pages.forEach(function(page, index) {
        if(index > 0)
            doc.pageBreak();

        // title specific to each page
        table = doc.table(underscore.extend({widths: ['100%']}, convertFontName(metadata.pageTitle)));
        table.tr().td(page.pageTitle);

        table = doc.table(convertFontName(metadata.table));

        // header row(s)
        metadata.headerRowsData.forEach(function (rowData) {
            tr = table.tr(convertFontName(metadata.headerRows));

            metadata.rowKeys.forEach(function(rowKey) {
                tr.td(rowData[rowKey]);
            });
        });

        // data row(s)
        page.data.forEach(function(dataset) {
            var datasetMetadata = metadata.mainRows;
            var datasetMetadataFirstCell = metadata.mainRowsFirstCell;
            if(dataset.summary) {
                datasetMetadata = metadata.summaryRows;
                datasetMetadataFirstCell = metadata.summaryRowsFirstCell;
            }

            dataset.rows.forEach(function(rowData) {
                tr = table.tr(convertFontName(datasetMetadata));

                metadata.rowKeys.forEach(function(rowKey, index) {
                    if(index === 0)
                        tr.td(rowData[rowKey], convertFontName(datasetMetadataFirstCell));
                    else
                        tr.td(formatNumber(metadata.numberFormat[rowKey])(rowData[rowKey]));
                });
            });
        });

        table = doc.table({widths: ["100%"]});
        tr = table.tr();
        var generatedText = tr.td().text();
        generatedText.br();
        var generatedDateWithTimezone = moment.tz(moment(), conf.reportSchedule.defaultTimezone);
        var generatedDateText = generatedDateWithTimezone.format('M/D/YYYY h:mm:ss A z');
        generatedText.append("Report Generated by Reporting System at " + generatedDateText);
    });

    var pdf = doc.render();

    var tempPath = path.resolve(__dirname + '/../temp');
    var tempFileTemplate = tempPath + '/XXXXXX.pdf';
    tmp.tmpName({template: tempFileTemplate}, function(err, path) {
        if(err)
            return next(err);

        savePdf(pdf, path, next);
    });
}

exports.pdfFromDataAsFile = pdfFromDataAsFile;

function savePdf(pdf, filepath, next) {
    //var buffer = new Buffer(pdf.toString(), 'binary');
    fs.writeFile(pdf.toString(), 'binary', function(err) {
        if(err)
            return next(err);

        next(null, filepath);
    })
}

And here is some sample code that populates my doc metadata:

function getMetadata(docTitle, headerTitle) {
    var docMetadata = {
        width: 1008, // (14  in * 72 dpi)
        height: 612  // (8.5 in * 72 dpi)
    };

    var tableMetadata = {
        headerRows: 0, fontSize: 5,
        borderHorizontalWidth: 0.5,
        borderVerticalWidth: 0.5,
        widths: [
            '11.1%', '3.7%', '3.7%', '3.7%', '3.7%',
            '3.7%', '3.7%', '3.7%', '3.7%', '3.7%',
            '3.7%', '3.7%', '3.7%', '3.7%', '3.7%',
            '3.7%', '3.7%', '3.7%', '3.7%', '3.7%',
            '3.7%', '3.7%', '3.7%', '3.7%', '3.7%'
        ],
        padding: 1
    };

    var rowKeys = [
        'name', 'net_sales', 'net_sales_last_year', 'net_sales_ly_delta',
        'visitors', 'visitors_last_year', 'visitors_ly_delta', 'conversion', 'conversion_last_year', 'conversion_ly_delta',
        'transactions', 'transactions_last_year', 'transactions_ly_delta',
        'units', 'units_last_year', 'units_ly_delta',
        'average_unit_retail', 'average_unit_retail_last_year', 'average_unit_retail_ly_delta',
        'units_per_transaction', 'units_per_transaction_last_year', 'units_per_transaction_ly_delta',
        'avg_dollar_per_transaction', 'avg_dollar_per_transaction_last_year', 'avg_dollar_per_transaction_ly_delta'
    ];

    var pageTitleMetadata = { fontName: 'bold', fontSize: 5, paddingTop: 10, paddingBottom: 4};

    var headerRowsMetadata = { textAlign: 'center', fontName: 'bold', backgroundColor: 'd2d2d2' };

    var headerRowsData = [
        {
            name: '',
            net_sales: 'Net Sales',
            net_sales_last_year: 'Net Sales',
            net_sales_ly_delta: 'Net Sales',
            units: 'Units',
            units_last_year: 'Units',
            units_ly_delta: 'Units',
            visitors: 'Visitors',
            visitors_last_year: 'Visitors',
            visitors_ly_delta: 'Visitors',
            conversion: 'Conversion Rate',
            conversion_last_year: 'Conversion Rate',
            conversion_ly_delta: 'Conversion Rate',
            transactions: 'Transactions',
            transactions_last_year: 'Transactions',
            transactions_ly_delta: 'Transactions',
            average_unit_retail: 'AUR',
            average_unit_retail_last_year: 'AUR',
            average_unit_retail_ly_delta: 'AUR',
            units_per_transaction: 'UPT',
            units_per_transaction_last_year: 'UPT',
            units_per_transaction_ly_delta: 'UPT',
            avg_dollar_per_transaction: 'ADT',
            avg_dollar_per_transaction_last_year: 'ADT',
            avg_dollar_per_transaction_ly_delta: 'ADT'
        },{
            name: headerTitle,
            net_sales: 'Actual',
            net_sales_last_year: 'Last Year',
            net_sales_ly_delta: '% Chg to LY',
            units: 'Actual',
            units_last_year: 'Last Year',
            units_ly_delta: '% Chg to LY',
            visitors: 'Actual',
            visitors_last_year: 'Last Year',
            visitors_ly_delta: '% Chg to LY',
            conversion: 'Actual',
            conversion_last_year: 'Last Year',
            conversion_ly_delta: 'Chg to LY',
            transactions: 'Actual',
            transactions_last_year: 'Last Year',
            transactions_ly_delta: '% Chg to LY',
            average_unit_retail: 'Actual',
            average_unit_retail_last_year: 'Last Year',
            average_unit_retail_ly_delta: 'Chg to LY',
            units_per_transaction: 'Actual',
            units_per_transaction_last_year: 'Last Year',
            units_per_transaction_ly_delta: 'Chg to LY',
            avg_dollar_per_transaction: 'Actual',
            avg_dollar_per_transaction_last_year: 'Last Year',
            avg_dollar_per_transaction_ly_delta: 'Chg to LY'
        }
    ];

    var mainRowsMetadata = {textAlign: 'right'};

    var mainRowsFirstCellMetadata = {textAlign: 'left', fontName: 'bold'};

    var summaryRowsMetadata = { textAlign: 'right', fontName: 'bold', backgroundColor: 'd2d2d2'};

    var summaryRowsFirstCellMetadata = {textAlign: 'left'};

    var numberFormatMetadata = {
        net_sales: {prefix: '$', round: 0},
        net_sales_last_year: {prefix: '$', round: 0},
        net_sales_ly_delta: {suffix: '%', round: 2},
        units: {},
        units_last_year: {},
        units_ly_delta: {suffix: '%', round: 2},
        visitors: {},
        visitors_last_year: {},
        visitors_ly_delta: {suffix: '%', round: 2},
        conversion: {suffix: '%', round: 2},
        conversion_last_year: {suffix: '%', round: 2},
        conversion_ly_delta: {round: 2},
        transactions: {},
        transactions_last_year: {},
        transactions_ly_delta: {suffix: '%', round: 2},
        average_unit_retail: {prefix: '$', round: 2},
        average_unit_retail_last_year: {prefix: '$', round: 2},
        average_unit_retail_ly_delta: {prefix: '$', round: 2},
        units_per_transaction: {round: 2},
        units_per_transaction_last_year: {round: 2},
        units_per_transaction_ly_delta: {round: 2},
        avg_dollar_per_transaction: {prefix: '$', round: 2},
        avg_dollar_per_transaction_last_year: {prefix: '$', round: 2},
        avg_dollar_per_transaction_ly_delta: {prefix: '$', round: 2}
    };

    var metadata = {
        doc: docMetadata,
        docTitle: docTitle,
        numberFormat: numberFormatMetadata,
        pageTitle: pageTitleMetadata,
        table: tableMetadata,
        rowKeys: rowKeys,
        headerRows: headerRowsMetadata,
        headerRowsData: headerRowsData,
        mainRows: mainRowsMetadata,
        mainRowsFirstCell: mainRowsFirstCellMetadata,
        summaryRows: summaryRowsMetadata,
        summaryRowsFirstCell: summaryRowsFirstCellMetadata
    };

    return metadata;
}

I don't have any sample data for pages, but they would have the format:

[{pageTitle: string, data: object}]

where the data object would have the same format as headerRowsData in the metadata object. The values within the data object would be numbers.

from pdfjs.

keithrz avatar keithrz commented on August 24, 2024

I've created a full test gist here:
https://gist.github.com/keithrz/d8c9b6c2821bd66c36e5

from pdfjs.

rkusa avatar rkusa commented on August 24, 2024

Should be fixed in master. It was an issue with rounding numbers in exponential notation.
I've also cut come bytes of the resulting file size.

There are some more possible ways to reduce file size, e.g.: draw borders for each row and not for each single cell and compress PDF content streams. I've added these features to my todo list, but they do not have a high priority.

For performance and memory usage: I'll definitely add a streaming API. But unfortunately, not before December.

Thank you very much for providing the example code!

from pdfjs.

keithrz avatar keithrz commented on August 24, 2024

Awesome improvement!

Besides fixing the invalid character issue, the 28-page report shrunk almost in half - from 6 Mb to 3.3 Mb.

The streaming API would be amazing, and yes, definitely something that will take some time.

Unless you want this issue kept open to keep track of the streaming API, I will close it.

from pdfjs.

rkusa avatar rkusa commented on August 24, 2024

I am glad it worked out!

Streaming API will be tracked in #47

from pdfjs.

rkusa avatar rkusa commented on August 24, 2024

Version 2.0.0-alpha.1 is now completely streaming based.

from pdfjs.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.