#!/usr/bin/perl -w # # Fetch files from NCBI use aspera command line tool, assuming # installed in standard location (~/.aspera) use strict; use Getopt::Long; use File::Path; use File::Spec; use Net::FTP; my $ascp = '~/.aspera/connect/bin/ascp'; my $ascp_key = '~/.aspera/connect/etc/asperaweb_id_dsa.openssh'; my $maxRate = '600m'; my $asperaURL = 'anonftp@ftp-private.ncbi.nlm.nih.gov:'; my $ftpURL = 'ftp://ftp.ncbi.nih.gov'; my (%opts); if (!GetOptions('mirror' => \$opts{mirror}, 'cut-dirs=i' => \$opts{'cut-dirs'}) || (@ARGV == 0)) { print "usage: $0 [-cut-dirs ] \n". "\n". "Source should be NCBI ftp url(s), i.e. ftp://ftp.ncbi.nih.gov/...\n". "Wildcards (*, ?) in the url(s) will work as expected.\n". "Will transfer files to current directory unless specify -cut-dirs,\n". "in which case will build path relative to current directory,\n". "minus the cut directories.\n". "\n"; exit 1; } my @fromURLs = (); if (grep {/\*|\?/} @ARGV) { #do we need globbing? #aspera doesn't handle globbing; use ftp here my ($ftp_site) = $ARGV[0] =~ /^ftp:\/\/(.+?)\//; my $ftp = Net::FTP->new($ftp_site) or die "Could not connect to $ftp_site: $!\n"; $ftp->login() or die "Cannot login ", $ftp->message; foreach my $url (@ARGV) { my ($path) = $url =~ /^ftp:\/\/.+?(\/.+?)(?:\/*)?$/; push @fromURLs, map {"ftp://$ftp_site$_"} $ftp->ls($path); } } else { @fromURLs = @ARGV; } #print join("\n<<>>\n", @fromURLs), "\n"; foreach my $fromURL (@fromURLs) { $fromURL =~ s{^$ftpURL}{$asperaURL}; my $toURL = '.'; my ($dir, $fn) = $fromURL =~ /^$asperaURL\/(.+?\/)([^\/]*)$/; my @dirs = split /\//, $dir; if (defined $opts{'cut-dirs'}) { if (@dirs < $opts{'cut-dirs'}) { warn "asked to cut more directories than there are in URL!\n"; @dirs = (); } else { @dirs = @dirs[$opts{'cut-dirs'}..$#dirs]; } $dir = join('/', @dirs); File::Path::make_path($dir); $toURL = $dir; } #print "$ascp -p -i $ascp_key -Q -l$maxRate $fromURL $toURL\n"; my $res = system("$ascp -Trp -i $ascp_key -Q -l$maxRate $fromURL $toURL"); die "aborted download\n" if $res != 0; }