#!/usr/bin/python2.6
|
#
|
# CDDL HEADER START
|
#
|
# The contents of this file are subject to the terms of the
|
# Common Development and Distribution License (the "License").
|
# You may not use this file except in compliance with the License.
|
#
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
# or http://www.opensolaris.org/os/licensing.
|
# See the License for the specific language governing permissions
|
# and limitations under the License.
|
#
|
# When distributing Covered Code, include this CDDL HEADER in each
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
# If applicable, add the following below this CDDL HEADER, with the
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
#
|
# CDDL HEADER END
|
#
|
# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
|
#
|
#
|
# fetch.py - a file download utility
|
#
|
# A simple program similiar to wget(1), but handles local file copy, ignores
|
# directories, and verifies file hashes.
|
#
|
|
import os
|
import sys
|
import shutil
|
from urllib import splittype
|
from urllib2 import urlopen
|
import hashlib
|
|
def printIOError(e, txt):
|
""" Function to decode and print IOError type exception """
|
print "I/O Error: " + txt + ": "
|
try:
|
(code, message) = e
|
print str(message) + " (" + str(code) + ")"
|
except:
|
print str(e)
|
|
def validate(file, hash):
|
algorithm, hashvalue = hash.split(':')
|
|
# force migration away from sha1
|
if algorithm == "sha1":
|
algorithm = "sha256"
|
try:
|
m = hashlib.new(algorithm)
|
except ValueError:
|
return False
|
|
while True:
|
try:
|
block = file.read()
|
except IOError, err:
|
print str(err),
|
break
|
|
m.update(block)
|
if block == '':
|
break
|
|
return "%s:%s" % (algorithm, m.hexdigest())
|
|
def validate_container(filename, hash):
|
try:
|
file = open(filename, 'r')
|
except IOError as e:
|
printIOError(e, "Can't open file " + filename)
|
return False
|
return validate(file, hash)
|
|
|
def validate_payload(filename, hash):
|
import re
|
import gzip
|
import bz2
|
|
expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
|
expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
|
expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
|
|
try:
|
if expr_bz.match(filename):
|
file = bz2.BZ2File(filename, 'r')
|
elif expr_gz.match(filename):
|
file = gzip.GzipFile(filename, 'r')
|
elif expr_tgz.match(filename):
|
file = gzip.GzipFile(filename, 'r')
|
else:
|
return False
|
except IOError as e:
|
printIOError(e, "Can't open archive " + filename)
|
return False
|
return validate(file, hash)
|
|
|
def download(url, filename = None):
|
src = None
|
|
try:
|
src = urlopen(url)
|
except IOError as e:
|
printIOError(e, "Can't open url " + url)
|
return None
|
|
# 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
|
if src.getcode() and (3 <= int(src.getcode()/100) <= 5):
|
print "Error code: " + str(src.getcode())
|
return None
|
|
if filename == None:
|
filename = src.geturl().split('/')[-1]
|
|
try:
|
dst = open(filename, 'wb');
|
except IOError as e:
|
printIOError(e, "Can't open file " + filename + " for writing")
|
src.close()
|
return None
|
|
while True:
|
block = src.read()
|
if block == '':
|
break;
|
dst.write(block)
|
|
src.close()
|
dst.close()
|
|
# return the name of the file that we downloaded the data to.
|
return filename
|
|
def download_paths(search, filename, url):
|
urls = list()
|
|
if filename != None:
|
tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
|
if tmp:
|
search += tmp.split(' ')
|
|
file = os.path.basename(filename)
|
|
urls = [ base + '/' + file for base in search ]
|
|
# filename should always be first
|
if filename in urls:
|
urls.remove(filename)
|
urls.insert(0, filename)
|
|
# command line url is a fallback, so it's last
|
if url != None and url not in urls:
|
urls.append(url)
|
|
return urls
|
|
def usage():
|
print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
|
sys.exit(1)
|
|
def main():
|
import getopt
|
|
# FLUSH STDOUT
|
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
|
|
file_arg = None
|
link_arg = False
|
hash_arg = None
|
url_arg = None
|
search_list = list()
|
|
try:
|
opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
|
["file=", "link", "hash=", "search=", "url="])
|
except getopt.GetoptError, err:
|
print str(err)
|
usage()
|
|
for opt, arg in opts:
|
if opt in [ "-f", "--file" ]:
|
file_arg = arg
|
elif opt in [ "-l", "--link" ]:
|
link_arg = True
|
elif opt in [ "-h", "--hash" ]:
|
hash_arg = arg
|
elif opt in [ "-s", "--search" ]:
|
search_list.append(arg)
|
elif opt in [ "-u", "--url" ]:
|
url_arg = arg
|
else:
|
assert False, "unknown option"
|
|
if url_arg == None:
|
usage()
|
|
for url in download_paths(search_list, file_arg, url_arg):
|
print "Source %s..." % url,
|
|
scheme, path = splittype(url)
|
name = file_arg
|
|
if scheme in [ None, 'file' ]:
|
if os.path.exists(path) == False:
|
print "not found, skipping file copy"
|
continue
|
elif name != path:
|
if link_arg == False:
|
print "\n copying..."
|
shutil.copy2(path, name)
|
else:
|
print "\n linking..."
|
os.symlink(path, name)
|
else:
|
pass
|
elif scheme in [ 'http', 'https', 'ftp' ]:
|
print "\n downloading...",
|
name = download(url, file_arg)
|
if name == None:
|
print "failed"
|
continue
|
|
print "\n validating...",
|
if hash_arg == None:
|
print "skipping (no hash)"
|
sys.exit(0)
|
|
realhash = validate_container(name, hash_arg)
|
if realhash == hash_arg:
|
print "ok"
|
sys.exit(0)
|
else:
|
payloadhash = validate_payload(name, hash_arg)
|
if payloadhash == hash_arg:
|
print "ok"
|
sys.exit(0)
|
print "corruption detected"
|
print " expected: %s" % hash_arg
|
print " actual: %s" % realhash
|
print " payload: %s" % payloadhash
|
|
try:
|
os.remove(name)
|
except OSError:
|
pass
|
|
sys.exit(1)
|
|
if __name__ == "__main__":
|
main()
|