#!/usr/bin/perl

###############################################################################
#   pgexport:  ASCII export utility for PostgreSQL                            #
#   version 0.9.0 2004-01-26                                                  #
#                                                                             #
#   Copyright 2004, Wayne Matthew Syvinski                                    #
#                                                                             #
#   If you use this software, you agree to the following:                     #
#                                                                             #
#   (1) You agree to hold harmless and waive any claims against the author.   #
#   (2) You agree that there is no warranty, express or implied, for this     #
#       software whatsoever.                                                  # 
#   (3) You will abide by the GNU General Public License or the Artistic      #
#       License in the use of this software.                                  #
#   (4) You agree not to modify this notice.                                  #
#                                                                             #
#   If you use this software and find it useful, I would appreciate an        #
#   emailed note to matthew@techcelsior.com                                   #
#                                                                             #
###############################################################################

use strict;
use warnings;

use DBI;
use Text::ParseWords;
BEGIN
{
	eval "use Term::ReadKey";
};
#Term::ReadKey is not part of the standard Perl distribution as of 5.8.1
#Perl installations that do not have it can still run pgexport
#but passwords will appear in cleartext on the terminal

my (@config, $configitem);
my ($fieldnamelist,@fieldnames,$fieldname, @sourcefieldnames);
my ($createtable,$insertquery, $insertstub);
my ($tempitem,$tempitem2,@temparray,$i);
my (@timearray);
my ($dbh,$sth);
my ($dataline, $datum, @dataarray, $insertlist);
my ($packspec,@speclines,@widths);
my $termkey;
my ($incomingformat,$nativeformat);
my $recordcount;

print STDOUT "+==========================================+\n";
print STDOUT "|pgexport:  PostgreSQL ASCII export utility|\n";
print STDOUT "|   Copyright 2003 Wayne Matthew Syvinski  |\n";
print STDOUT "|                                          |\n";
print STDOUT "|Use allowed under the Artistic License or |\n";
print STDOUT "|the GNU General Public License, version 2 |\n";
print STDOUT "|                or later.                 |\n";
print STDOUT "|                                          |\n";
print STDOUT "|Complete terms and documentation contained|\n";
print STDOUT "|      within source code (POD format)     |\n";
print STDOUT "+==========================================+\n\n\n";

$recordcount = 0;

if ($@)
{
	$termkey = 0;
}
else
{
	$termkey = 1;
}

if ($ARGV[0] eq 'c')
{
	open INFILE, "<$ARGV[1]" or die qq|File $ARGV[1] does not exist - pgexport exiting\n\n|;
	@config = <INFILE>;
	chomp(@config);
	close INFILE;
}
else
{
	@config = @ARGV;
}

if (-e $config[7])
{
	print STDOUT "File $config[7] already exists.  Exiting...";
	exit(0);
}

unless ($config[3])
{
	print STDOUT "A database name is required.  Exiting...\n";
	exit(0);
}

if (($config[0] =~ m/d/i) and (!($config[0])))
{
	print STDOUT "Delimited files require declaration of a delimiter.  Exiting...\n";
	exit(0);
}

@timearray = localtime(time());

foreach $configitem(@config)
{
	if ($configitem =~ m/NULL/i)
	{
		$configitem = q||;
	}
}

unless ($config[2])
{
	$config[2] = '5432';
}

if (($config[8] eq 't') or ($config[8] eq '\t'))
{
    $config[8] = "\t";
}

unless ($config[4])
{
	print STDOUT "\nEnter your database username: ";
	chomp($config[4] = <STDIN>);
}

unless ($config[5])
{
	#if module Term::ReadKey is not available, 
	#you will have to enter an unmasked, echoed password

	print STDOUT "\nEnter your database password: ";

	if ($termkey)
	{
	    ReadMode('noecho');
		chomp($config[5] = ReadLine(0));
		ReadMode('normal')
	}
	else
	{
		chomp($config[5] = <STDIN>);		
	}
}

#attend to the listing of field names for export or validation

$dbh = DBI->connect("dbi:Pg:dbname=$config[3];host=$config[1];port=$config[2]","$config[4]","$config[5]");

$sth = $dbh->prepare("SELECT * FROM $config[6] WHERE FALSE");

$sth->execute();

@sourcefieldnames = ();

no strict 'subs';

for ($i = 1; $i <= $sth->{NUM_OF_FIELDS}; $i++)
{
	push @sourcefieldnames, $sth->{NAME}->[$i-1];	
}

use strict 'subs';

$sth->finish();
$dbh->disconnect();

if ($config[0] =~ m/d/i) #the export of fieldnames is appropriate only for delimited files
{
	if ($config[10]) #if a file containing field names is provided in @ARGV
	{
		open FILESPEC, "<$config[10]";
		chomp(@fieldnames = <FILESPEC>);
		close FILESPEC;
		foreach $fieldname(@fieldnames) #do this in case file is structured as FIELDNAME, LENGTH
		{
			@temparray = split(",",$fieldname);
			$fieldname = $temparray[0];
		}
	}
	elsif ($config[11])  #if a fieldname list is provided in @ARGV
	{
		@fieldnames = quotewords(",",0,$config[11]);
	}
	else #use the field names given in the database table
	{
		@fieldnames = @sourcefieldnames;
	}

	foreach $fieldname(@fieldnames)
	{
		if ($config[12] =~ m/u/i) #option to force uppercase
		{
			$fieldname = uc($fieldname);
		}
		elsif ($config[12] =~ m/l/i) #option to force lowercase
		{
			$fieldname = lc($fieldname);
		}
	}
}
elsif ($config[0] =~ m/f/i) #if using fixed-width data, a pack mask is required
{
	if ($config[8])
	{
		$packspec = $config[8];
	}
	elsif ($config[10])
	{
		open SPECFILE, "<$config[10]";
		@speclines = <SPECFILE>;
		close SPECFILE;
		chomp(@speclines);
		@widths = ();

		foreach $tempitem(@speclines)
		{
			if ($tempitem)
			{
				@temparray = quotewords(",",0,$tempitem);
				push @widths,$temparray[1];
			}
		}

		$packspec = 'A'.join('A',@widths);				
	}
}


#check to make sure the number of fields in a filespec or fieldlist equals the number of fields to be exported

if ($config[0] =~ m/d/i)
{
	if ($#fieldnames != $#sourcefieldnames)
	{
		print STDOUT "The number of fields in your table or view does not match the number of fields in your ".($config[10]?'specfile':'fieldlist').". Exiting...\n";
		exit(0);
	}
}
elsif ($config[0] =~ m/f/i)
{
	$tempitem = $packspec;
	$tempitem =~ s/[0-9 ]//g;
	
	if (((length($tempitem)) - 1) != $#sourcefieldnames)
	{
		print STDOUT "The number of fields in your table or view does not match the number of fields in your ".($config[10]?'specfile':'mask').". Exiting...\n";
		exit(0);		
	}
}

unless ($config[7]) #an export filename is required - if one wasn't provided, one must be generated
{
	$config[6] = "./".$config[6]."_export_".($timearray[5]+1900).substr('0'.($timearray[4]+1),-2).substr('0'.$timearray[3],-2).'_'.substr('0'.$timearray[2],-2).substr('0'.$timearray[1],-2).substr('0'.$timearray[0],-2);
}

###

$recordcount = 0;

open DATAFILE, ">$config[7]";

#if the first text record should contain field names, export it now

if (($config[9]) and ($config[0] =~ m/d/i)) #must request field name export, and is only appropriate for delimited data
{
	$tempitem = $config[13].join(($config[13].$config[8].$config[13]),@fieldnames).$config[13];
	
	if ($config[12] =~ m/u/i)
	{
		$tempitem = uc($tempitem);
	}
	elsif ($config[12] =~ m/l/i)
	{
		$tempitem = lc($tempitem);
	}
	
	print DATAFILE $tempitem."\n";
}

#now access the table or view and get the data out of the database

$dbh = DBI->connect("dbi:Pg:dbname=$config[3];host=$config[1];port=$config[2]","$config[4]","$config[5]");

$sth = $dbh->prepare("SELECT * FROM $config[6]");

$sth->execute();

while (@dataarray = $sth->fetchrow_array)
{
	unless ($config[14])
	{
		foreach $datum(@dataarray)
		{
			$datum =~ s/\'//g;
		}
	}

	if ($config[0] =~ m/d/i)
	{
		$dataline = $config[13].join(($config[13].$config[8].$config[13]),@dataarray).$config[13];		
	}
	elsif ($config[0] =~ m/f/i)
	{
		$dataline = pack($packspec,@dataarray)	
	}

	if ($config[12] =~ m/u/i)
	{
		$dataline = uc($dataline);
	}
	elsif ($config[12] =~ m/l/i)
	{
		$dataline = lc($dataline);
	}

	print DATAFILE $dataline."\n";
	$recordcount++;
}


$sth->finish();

$dbh->disconnect;

close DATAFILE;

print STDOUT "\n\nFINISHED exporting $recordcount records to file $config[7]\nfrom table $config[6] in database $config[3]\n";
print STDOUT "pgexport ".($timearray[5]+1900).'-'.substr('0'.($timearray[4]+1),-2).'-'.substr('0'.$timearray[3],-2).'  '.substr('0'.$timearray[2],-2).':'.substr('0'.$timearray[1],-2).':'.substr('0'.$timearray[0],-2)."\n\n";

exit(0);

=head1 NAME

pgexport - a Perl utility to download ASCII files from PostgreSQL databases

=head1 SYNOPSIS

B<command-line method:>

B<pgexport> I<mode> I<dbhost> I<dbport> I<dbname> I<dbuser> I<dbpass> I<tablename> I<datafile> I<mask_or_delimiter> I<firstrecord> I<specfile> I<fieldlist> I<forcecase> I<quotemark> I<apostrophes> 

or

B<configuration file method:>

B<pgexport> B<c> I<configfile>

B<All arguments are mandatory>, but some may be marked as NULL.

=head1 DESCRIPTION

B<pgexport> allows for the export of delimited or fixed-width ASCII text 
from PostgreSQL databases.

I wrote this because (1) COPY is limited to the superuser (2) \copy is a crippled version of COPY (3) pgadmin3 and pgaccess have broken import/export functionality (at least on Fedora Core 1 - yes, with the updates installed via yum) (4) I am trying to extricate myself from certain proprietary software products from Redmond, Washington.  A replacement for a certain desktop database is the last piece of the puzzle - and I am only too happy to share with others.

So, in the great tradition of Open-Source, I rolled my own.

Yes, there is a B<pgimport>.

=head1 LICENSE

This software (pgexport) may be used under either the GNU General Public License, version 2 (or at your option, any later version), or the Artistic License.

No warranty or guarantee, either express or implied, exists for this software or for the use of this software.  You use this software at your own risk.

=head1 DEVELOPMENT

B<pgexport> was developed using Fedora Core 1 (updated via yum to 2004-01-26), PostgreSQL 7.3.4, and Perl 5.8.1 i386-linux-thread-multi.

=head1 OPTIONS

B<command-line method>

NOTE:  When NULL is an option, the literal string "NULL" is meant, without any quotation marks.  A "0" (zero) can also be substituted where NULL is used (again, no quotation marks).

=over 4

=item I<mode>

I<mode> is either B<d> if using delimited data, or B<f> if using fixed-width data

=item I<dbhost>

I<dbhost> is the DNS host name or IP address of the PostgreSQL server.  If B<NULL>, then B<localhost> is the default.

=item I<dbport>

I<dbport> is the connection port for the PostgreSQL server.  If B<NULL>, then B<5432> is the default.

=item I<dbname>

I<dbname> is the name of the database on the server.  No default value is available, so you MUST provide this information.

=item I<dbuser>

I<dbuser> is the username by which you intend to connect to the database server.  No default, but if NULL is passed, you will be prompted for a username from the command line.

=item I<dbpass>

I<dbpass> is the password by which you intend to connect to the database server, No default, but if NULL is passed, you will be prompted for a password from the command line.  If Perl module Term::ReadKey is available, the password will be entered no-echo.  If the module is not available, the password will be entered in cleartext.

=item I<tablename>

I<tablename> is the name of the table or view in the database from which you want to export your data.  Note that the table or view must alredy exist.  No default value is possible.  

=item I<datafile>

I<datafile> is the name of the file containing the data you want to import.  An absolute path is not required, but is best.  If NULL is passed, the default is a timestamped filename of the form I<tablename>B<_export_YYYYMMDD_HHMMSS>, which will be saved in the same directory as pgexport.  If you do not have write permissions to the directory, you will not get an export file!

=item I<mask_or_delimiter>

I<mask_or_delimiter> works differently depending on whether I<mode> is B<d> or B<f>.  

If I<mode> is B<d>, then the field delimiter is entered here (use \t for tabs).  There is NO default delimiter.

If I<mode> is B<f>, then a pack mask may be given here, or NULL passed.  (See Perl documentation for functions pack() and unpack()).  Pack masks should use only the B<A> template character for ASCII data (i.e. do not use a template character other than B<A> unless you know what you are doing).

=item I<firstrecord>

If I<mode> is B<f>, this must be B<NULL>.

If I<mode> if B<d>, pass a B<1> here to indicate that the datafile should be exported with field names in its first record.  If it does not, pass B<NULL>.

=item I<specfile>

I<specfile> indicates the location of a layout and specification file.  The file should contain records in the form I<recordname>,I<fieldwidth> (yes, comma-separated), with each record entry terminated by a newline (so it should "read down"), although I<fieldwidth> is optional if I<mode> is B<d>. 

If no layout and specification file is used, pass B<NULL>.

=item I<fieldlist>

I<fieldlist> contains a comma-delimited list of field names.  B<NULL> may be passed if you are providing field names another way.

=item I<forcecase>

I<forcecase> can have one of three values:  B<u>, B<l>, B<p>.  

B<p> preserves the case of all values and fieldnames.  This is the default

B<u> forces all values and fieldnames to uppercase.

B<l> forces all values and fieldnames to lowercase.

=item I<quotemark>

I<quotemark> contains the quoting character you want to use.  All data is exported as text.  If you do not want a quoting character, pass NULL.

I<quotemark> should be NULL if I<mode> is B<f>.

If you are using B<pgexport> in command-line mode and you want to use the single-quote or double-quote character, you must escape the character with a backslash.  However, if you are using configuration file mode, do NOT escape the character with a backslash.

=item I<apostrophes>

I<apostrophes> controls the export of single-quote and apostrophe characters.

If I<apostrophes> is 1, single-quotes and apostrophes will be preserved.  This is the default.

If I<apostrophes> is 0, single-quotes and apostrophes are eliminated from values before insertion, and are therefore lost.

If you want to use the single-quote character in I<quotemark>, then take care that I<apostrophes> is 0.

=back

B<configuration file method>

=over 4

In the configuration file method, I<mode> is B<c>.  Instead of reading options from the command line, they are read from I<configfile>.  The entries in I<configfile> should appear in the same order, using the same syntax, as for the command-line method, except that a newline should terminate each argument (i.e. the arguments should form a list "reading down").

=back

=head1 INTERNALS

Array @config holds the information obtained from the command line or from the configuration file.  The elements of the array are used as follows.  Please see the appropriate entries in section OPTIONS.

$config[0]: I<mode>

$config[1]: I<dbhost>

$config[2]: I<dbport>

$config[3]: I<dbname>

$config[4]: I<dbuser>

$config[5]: I<dbpass>

$config[6]: I<tablename>

$config[7]: I<datafile>

$config[8]: I<mask_or_delimiter>

$config[9]: I<firstrecord>

$config[10]: I<specfile>

$config[11]: I<fieldlist>

$config[12]: I<forcecase>

$config[13]: I<quotemark>

$config[14]: I<apostrophes>

=head1 FILES

=over 4

Please see section OPTIONS for required fields, specifically I<datafile> and I<specfile>.

=back

=head1 DIAGNOSTICS AND GOTCHAS

=over 4

I am too lazy to go over every error message here.  Besides, the ones you will get from STDOUT are descriptive enough.

However, some words of note and caution are in order.

1.  All fields are exported as text.  If you choose a quoting character, all fields will be quoted.

2.  For delimited files, obtaining field names from the database table is NOT the default.  The order is:  specfile, fieldlist, database table.  If you want to use the database table/view field names, make sure I<specfile> and I<fieldlist> are both NULL.

3.  Using a specfile or fieldlist for delimited data is not a bad idea - B<pgexport> checks to make sure the number of fields in the table or view matches the number of fields in your filespec or fieldlist.  A field count is also always performed for fixed-width records.

4.  If you have special data types (i.e. dates, currency) that need special formatting before export, handle that using a database view; B<pgexport> does not perform any data formatting.

5.  If a file exists with the same name as I<datafile>, the program will terminate.

6.  If you are using the program in command-line mode and want to use the double-quote or single-quote character for quoting, you must escape it first with a backslash.  You do not escape characters in configuration file mode.

7.  For those of you on Unixlike systems, you may want to run unix2dos after export before sending the file out into the WinDOS world.
  
=back

=head1 REQUIRES

Perl 5.004 or higher, Text::ParseWords, DBI, DBD::Pg

Term::ReadKey is required for no-echo password entry from the command line when prompted.  If this module is unavailable, the program will still run, but no-echo entry will not be available (you will have to enter your password in clear text from the command line).  See previous entry for I<dbpass>.

=head1 SEE ALSO

pack(), unpack(), Text::ParseWords, DBI, DBD:Pg, Term::ReadKey

=head1 THANKS

Many thanks to the PostgreSQL development team for a database I can live with (meaning inlined functions using PL/Perl), to Larry Wall and his little helpers for the best utility language in the world, and to the contributors to Fedora Core 1.

=head1 AUTHOR

Wayne Matthew Syvinski, matthew@techcelsior.com

=cut


