#!/usr/bin/perl -w # MySQL to PostgreSQL dump file converter # # For usage: mysqldump --help # # Convert mysqldump file (from MySQL) to something readable by psql ! # # homepage: http://www.rot13.org/~dpavlin/projects.html # 1999-12-15 DbP -- Dobrica Pavlinusic # 1999-12-26 DbP don't make serial from auto_increment, create all manually # (to set start value right) # 2000-01-11 DbP now creates sequences with correct value # 2000-04-25 DbP import into CVS (at cvs.linux.hr) # 2001-01-29 tpo -- Tomas Pospisek : # 1) make script comply to usage: # 2) make script output to STDOUT instead of STERR # 3) change verbosity behaveour # 4) add debug option # see rest of changelog at http://cvs.linux.hr/cvsweb.cgi/sql/mysql2pgsql use DBI; $DEBUG =0; $VERBOSE =0; sub usage { print "Usage: $0 [-v|--h] mysql_dump_file [pg_database_name]\n\n"; print "\t* mysql_dump_file is the filename of the mysql dump.\n"; print "\t* pg_database_name is the name of the postgres database.\n"; print "\t When ommitted standard output is used.\n"; print "\t Combined with -v, it will tell you what SQL\n"; print "\t commands are being executed\n"; } # if mysql_dump_file is not suplied or --h is supplied then we dump usage info if (! defined($ARGV[0]) || $ARGV[0] eq "--h") { usage(); exit 1; } if ($ARGV[0] eq "-v") { $VERBOSE = 1; # if verbose is set then user has to supply both arguments if(! defined($ARGV[1]) || ! defined($ARGV[2])) { usage(); exit 1; } else { $dump="$ARGV[1]"; $database="$ARGV[1]"; } } else { $dump="$ARGV[0]"; # if database name is supplied then use it if (defined($ARGV[1])) { $database="$ARGV[1]"; # else be verbose } else { $database=""; $VERBOSE = 1; } } if ($database) { my $dbh = DBI->connect("DBI:Pg:dbname=template1","","") || die $DBI::errstr; $dbh->do("create database $database") || die $dbh->errstr(); $dbh->disconnect; $dbh = DBI->connect("DBI:Pg:dbname=$database","","") || die $DBI::errstr; } $create=0; # inside create table? $table=""; open(DUMP,"$dump") || die "can't open dump file $dump"; while() { chomp; s/\r//g; print "Processing line: $_\n" if $DEBUG; # nuke comments or empty lines next if (/^#/ || /^$/); if ($create && /^\);/i) { # end of create table squence $create=0; $sql =~ s/,$//g; # strip last , inside create table } if ($create) { # are we inside create table? # int,auto_increment -> serial if (/int.*auto_increment/i) { # this was simple solution, but squence isn't # initialized correctly so I have to do a work-around # # s/\w*int.*auto_increment/serial/ig; if (/^\s*(\w+)\s+/) { $seq="${table}_${1}_seq"; push @sequences,"$table.$1"; s/(\s+)\w*int.*auto_increment[^,]*/$1int4 default nextval('$seq') not null/ig; } else { die "can't get name of field!"; } # int type conversion } elsif (/(\w*)int\(\d+\)/i) { $size=$1; $size =~ tr [A-Z] [a-z]; if ($size eq "tiny" || $size eq "small") { $out = "int2"; } elsif ($size eq "big") { $out = "int8"; } else { $out = "int4"; } s/\w*int\(\d+\)/$out/gc; } # nuke int unsigned s/(int\w+)\s+unsigned/$1/gi; # blob -> text s/\w*blob/text/gi; # tinytext/mediumtext -> text s/tinytext/text/gi; s/mediumtext/text/gi; # char -> varchar # PostgreSQL would otherwise pad with spaces as opposed # to MySQL! Your user interface may depend on this! s/\s+char/ varchar/gi; # nuke date representation (not supported in PostgreSQL) s/datetime default '[^']+'/datetime/i; s/date default '[^']+'/datetime/i; s/time default '[^']+'/datetime/i; # change not null datetime field to null valid ones # (to support remapping of "zero time" to null s/datetime not null/datetime/i; # nuke size of timestamp s/timestamp\([^)]*\)/timestamp/i; # double -> float8 s/double\([^)]*\)/float8/i; # add unique to definition of type (MySQL separates this) if (/unique \w+ \((\w+)\)/i) { $sql=~s/($1)([^,]+)/$1$2 unique/i; next; } # FIX: unique for multipe columns (col1,col2) are unsupported! next if (/unique/i); # FIX: nuke keys next if (/^\s+key/i && !/^\s+primary key/i); # quote column names s/(^\s*)(\S+)(\s*)/$1"$2"$3/gi if (!/key/i); # remap colums with names of existing system attribute if (/"oid"/i) { s/"oid"/"_oid"/g; print STDERR "WARNING: table $table uses column \"oid\" which is renamed to \"_oid\"\nYou should fix application manually! Press return to continue."; my $wait=; } s/oid/_oid/i if (/key/i && /oid/i); # fix oid in key } else { # not inside create table #---- fix data in inserted data: (from MS world) # FIX: disabled for now if (00 && /insert into/i) { s!\x96!-!g; # -- s!\x93!"!g; # `` s!\x94!"!g; # '' s!\x85!... !g; # \ldots s!\x92!`!g; } # fix dates '0000-00-00 00:00:00' (should be null) s/'0000-00-00 00:00:00'/null/gi; s/'0000-00-00'/null/gi; s/'00:00:00'/null/gi; s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/; # protect ; in inserts while (/('[^']*);([^']*)'/) { s/('[^']*);([^']*')/$1 _dotcol_ $2/g; } } $sql.="$_"; if (/create table/i) { $create++; /create table (\w+)/i; $table=$1 if (defined($1)); } if ($sql=~/\);/) { ($dosql,$sql)=split(/\);/,$sql); $dosql.=");"; # nuked by split, put it back! if ("$dosql" ne "") { $dosql=~s/ _dotcol_ /;/g; print "$dosql\n" if $VERBOSE; if($database) { $dbh->do("$dosql") || die "do: '$dosql' ",$dbh->errstr(); } } else { print STDERR "empty sql!\n"; } } } #print "creating sequences: @sequences\n"; foreach $seq (@sequences) { ($table,$field) = split(/\./,$seq); $sql="select max($field)+1 from $table"; print "$sql\n" if $VERBOSE; if($database){ $sth = $dbh->prepare($sql) || die $dbh->errstr(); $sth->execute() || die $sth->errstr(); ($start) = $sth->fetchrow_array() || 1; } else { print STDERR<do("create sequence $seq start $start increment 1") || die $dbh->errstr(); } } print "\n";