~olbohlen/oi-userland.git

#!/usr/bin/python2.6
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
#
#
# fetch.py - a file download utility
#
#  A simple program similiar to wget(1), but handles local file copy, ignores
#  directories, and verifies file hashes.
#
 
import os
import sys
import shutil
from urllib import splittype
from urllib2 import urlopen
import hashlib
 
def printIOError(e, txt):
    """ Function to decode and print IOError type exception """
    print "I/O Error: " + txt + ": "
    try:
        (code, message) = e
        print str(message) + " (" + str(code) + ")"
    except:
        print str(e)
    
def validate(file, hash):
    algorithm, hashvalue = hash.split(':')
 
    # force migration away from sha1
    if algorithm == "sha1":
        algorithm = "sha256"
    try:
        m = hashlib.new(algorithm)
    except ValueError:
        return False
 
    while True:
        try:
            block = file.read()
        except IOError, err:
            print str(err),
            break
 
        m.update(block)
        if block == '':
            break
 
    return "%s:%s" % (algorithm, m.hexdigest())
 
def validate_container(filename, hash):
    try:
        file = open(filename, 'r')
    except IOError as e:
        printIOError(e, "Can't open file " + filename)
        return False
    return validate(file, hash)
 
 
def validate_payload(filename, hash):
    import re
    import gzip
    import bz2
 
    expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
    expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
    expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
 
    try:
        if expr_bz.match(filename):
            file = bz2.BZ2File(filename, 'r')
        elif expr_gz.match(filename):
            file = gzip.GzipFile(filename, 'r')
        elif expr_tgz.match(filename):
            file = gzip.GzipFile(filename, 'r')
        else:
            return False
    except IOError as e:
        printIOError(e, "Can't open archive " + filename)
        return False
    return validate(file, hash)
 
 
def download(url, filename = None):
    src = None
 
    try:
        src = urlopen(url)
    except IOError as e:
        printIOError(e, "Can't open url " + url)
        return None
 
    # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
    if src.getcode() and (3 <= int(src.getcode()/100) <= 5):
        print "Error code: " + str(src.getcode())
        return None
 
    if filename == None:
        filename = src.geturl().split('/')[-1]
 
    try:
        dst = open(filename, 'wb');
    except IOError as e:
        printIOError(e, "Can't open file " + filename + " for writing")
        src.close()
        return None
 
    while True:
        block = src.read()
        if block == '':
            break;
        dst.write(block)
 
    src.close()
    dst.close()
 
    # return the name of the file that we downloaded the data to.
    return filename
 
def download_paths(search, filename, url):
    urls = list()
 
    if filename != None:
        tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
        if tmp:
            search += tmp.split(' ')
 
        file = os.path.basename(filename)
 
        urls = [ base + '/' + file for base in search ]
 
        # filename should always be first
        if filename in urls:
            urls.remove(filename)
        urls.insert(0, filename)
 
    # command line url is a fallback, so it's last
    if url != None and url not in urls:
        urls.append(url)
 
    return urls
 
def usage():
    print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
    sys.exit(1)
 
def main():
    import getopt
 
    # FLUSH STDOUT 
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
 
    file_arg = None
    link_arg = False
    hash_arg = None
    url_arg = None
    search_list = list()
 
    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
            ["file=", "link", "hash=", "search=", "url="])
    except getopt.GetoptError, err:
        print str(err)
        usage()
 
    for opt, arg in opts:
        if opt in [ "-f", "--file" ]:
            file_arg = arg
        elif opt in [ "-l", "--link" ]:
            link_arg = True
        elif opt in [ "-h", "--hash" ]:
            hash_arg = arg
        elif opt in [ "-s", "--search" ]:
            search_list.append(arg)
        elif opt in [ "-u", "--url" ]:
            url_arg = arg
        else:
            assert False, "unknown option"
 
    if url_arg == None:
        usage()
 
    for url in download_paths(search_list, file_arg, url_arg):
        print "Source %s..." % url,
 
        scheme, path = splittype(url)
        name = file_arg
 
        if scheme in [ None, 'file' ]:
            if os.path.exists(path) == False:
                print "not found, skipping file copy"
                continue
            elif name != path:
                if link_arg == False:
                    print "\n    copying..."
                    shutil.copy2(path, name)
                else:
                    print "\n    linking..."
                    os.symlink(path, name)
            else:
                pass
        elif scheme in [ 'http', 'https', 'ftp' ]:
            print "\n    downloading...",
            name = download(url, file_arg)
            if name == None:
                print "failed"
                continue
 
        print "\n    validating...",
        if hash_arg == None:
            print "skipping (no hash)"
            sys.exit(0)
            
        realhash = validate_container(name, hash_arg)
        if realhash == hash_arg:
            print "ok"
            sys.exit(0)
        else:
            payloadhash = validate_payload(name, hash_arg)
            if payloadhash == hash_arg:
                print "ok"
                sys.exit(0)
            print "corruption detected"
            print "    expected: %s" % hash_arg
            print "    actual:   %s" % realhash
            print "    payload:  %s" % payloadhash
 
        try:
            os.remove(name)
        except OSError:
            pass
 
    sys.exit(1)
 
if __name__ == "__main__":
    main()