[Lsb-messages] /var/www/bzr/lsb/devel/appbat r1005: more Pythonic cleanup

Fri Dec 30 15:33:27 UTC 2016

------------------------------------------------------------
revno: 1005
committer: Mats Wichmann <mats at linuxfoundation.org>
branch nick: appbat
timestamp: Fri 2016-12-30 08:33:27 -0700
message:
  more Pythonic cleanup
modified:
  extras/entitycheck.py
-------------- next part --------------
=== modified file 'extras/entitycheck.py'

--- a/extras/entitycheck.py	2016-12-30 00:41:26 +0000
+++ b/extras/entitycheck.py	2016-12-30 15:33:27 +0000
@@ -28,26 +28,33 @@
 --dryrun                      -- test what pkgs would be retrieved
 --show-extras                 -- report on unused pkgs/patches
 -u FILE, --updatefile=FILE    -- use FILE for pkg locations [{update_file}]
--z URL, --fallback=URL        -- use URL as fallback for pkg retrieval 
+-z URL, --fallback=URL        -- use URL as fallback for pkg retrieval
                                  [{fallback_url}]
 --prefer-fallback             -- prefer the fallback URL over upstream
 -w, --wget                    -- use wget to fetch files
 -h, --help                    -- print this text and exit
 """
 
-import sys, re, os, getopt, string, urllib, time, traceback
+import getopt
+import os
+import re
+import string
+import sys
+import time
+import traceback
+import urllib
 
 # Handle hashing module (md5 is the old way, hashlib is the new)
 try:
     import hashlib
-except:
+except ImportError:
     import md5 as hashlib
 
 # PATH DEFAULTS the only stuff that should need tuning
 # These are used as globals throughout
 sys.path.append('./extras')
 try:
-    from entitypath import *
+    from entitypath import epaths
 except ImportError:
     print "Cannot find configuration file <extras/entitypath.py>"
     print "Probably need to run ./Configure or ./configure"
@@ -66,46 +73,51 @@
 delete_bad = False
 prefer_fallback = False
 
-# this one is for checking in entity.__init__
+# this one is for checking in Entity.__init__
 SRE_MATCH_TYPE = type(re.match("", ""))
 
+
 def usage(code, msg=''):
-    if msg: print "ERROR:", msg; print
+    """Print usage, possibly with an error message preceding"""
+    if msg:
+        print "ERROR: {}\n".format(msg)
     print __doc__.format(**epaths)
     sys.exit(code)
 
-class entity:
-    """entity class, instantiate for each entity read from the entity file.
+
+class Entity(object):
+    """Entity class, instantiate for each entity read from the entity file.
     If so argument is a match object from re module. However entities are
     also built "by hand" from checking extra files, so have to check type
     """
     def __init__(self, match):
-        if type(match) is SRE_MATCH_TYPE:
+        if isinstance(match, SRE_MATCH_TYPE):
             self.name, self.file = match.groups()
         else:
             self.name, self.file = match
 
+        self.fullpath = ''
+
     def __repr__(self):
-        return "entity('{}', '{}')".format(self.name, self.file)
+        return "Entity('{}', '{}')".format(self.name, self.file)
 
     def __str__(self):
-        '''entity class "string" is "\tfilename", as that's what some things want to print'''
+        """Entity class "string" is "\tfilename", as that's what some things want to print"""
         return '\t{}'.format(self.file)
 
-
     BLOCKSIZE = 1024*1024
 
     def domd5(self):
         """Generate and store md5sum for this entity's filename"""
         f = open(self.fullpath, "rb")
-        sum = hashlib.md5()
+        cksum = hashlib.md5()
         while 1:
-            block = f.read(entity.BLOCKSIZE)
+            block = f.read(Entity.BLOCKSIZE)
             if not block:
                 break
-            sum.update(block)
+            cksum.update(block)
         f.close()
-        s = sum.digest()
+        s = cksum.digest()
         self.md5sum = "%02x"*len(s) % tuple(map(ord, s))
 
     def delete_file(self):
@@ -128,7 +140,8 @@
     def feedback(self, block_count, block_size, total):
         """Callback function for urllib.urlretrieve()"""
         so_far = block_count * block_size
-        if not so_far: return
+        if not so_far:
+            return
         pct = so_far * 100L / total
         # Work out remaining time: need transfer rate first
         now = time.time()
@@ -136,14 +149,17 @@
         rate = so_far / elapsed
         left = (total - so_far) / rate
         tmp = time.gmtime(left)
-        if tmp[3]: timestr = "%2d:" % tmp[3]
-        else: timestr = ""      # skip hours if zero
+        if tmp[3]:
+            timestr = "%2d:" % tmp[3]
+        else:
+            timestr = ""      # skip hours if zero
         timestr = "%s%02d:%02d remaining" % (timestr, tmp[4], tmp[5])
         self.message = "%d of %d bytes (%d%%) %s" % (so_far, total, pct, timestr)
         self.running_output()
 
     def fetch(self, locations, fallback, destination):
-        """retrieve the file for an entity.
+        """Retrieve the file for an entity using urllib.
+
         Calls urllib.urlretrieve() up to three times to retrieve the
         package. Retrieval locations are looked up in 'locations',
         the name of a locations instance must match our name, that
@@ -153,7 +169,8 @@
         self.message = "not found"
         self.front = "%s:" % self.file
         for loc in locations:
-            if self.name != loc.name: continue
+            if self.name != loc.name:
+                continue
             pkgpath = "%s/%s" % (destination, self.file)
             print self.front,
             sys.stdout.flush()          # force text to be displayed
@@ -173,7 +190,8 @@
             else:
                 urls = [loc.path, loc.alternate, fallback]
             for url in urls:
-                if not url: continue    # skip alternate if not defined
+                if not url:
+                    continue    # skip alternate if not defined
                 to_get = "%s/%s" % (url, self.file)
 
                 try:
@@ -184,25 +202,29 @@
                         raise IOError
                     self.message = "completed"
                     break
-                except:
+                except IOError:
                     sys.stdout.write("\n")
                     traceback.print_exc()
                     sys.stderr.write("try %d failed: %s\n" % (numtries, url))
-                    numtries = numtries + 1
+                    numtries += 1
                     # if it died or was interrupted, remove the partial file
-                    if os.path.exists(pkgpath): os.remove(pkgpath)
+                    if os.path.exists(pkgpath):
+                        os.remove(pkgpath)
                 # failed? just try next url in loop
             # if we didn't "break" out of inner loop, the fetch failed
-            else: self.message = "retrieval failed"
+            else:
+                self.message = "retrieval failed"
             break
 
         self.running_output()
         print
-        if self.message == "completed": return 1
+        if self.message == "completed":
+            return 1
         return 0
 
     def wget(self, locations, fallback, destination):
-        """retrieve the file for an entity.
+        """Retrieve the file for an entity using wget.
+
         Calls external command wget up to three times to retrieve the
         package. Retrieval locations are looked up in 'locations',
         the name of a locations instance must match our name, that
@@ -211,7 +233,9 @@
         """
         self.message = "not found"
         for loc in locations:
-            if self.name != loc.name: continue
+            if self.name != loc.name:
+                continue
+            pkgpath = "%s/%s" % (destination, self.file)
 
             if dry_run:                 # just print a message and bail
                 print "fetch %s from (%s, ...) to %s" % \
@@ -220,32 +244,35 @@
                 break
 
             # try up to three times to fetch the file
-            dir = os.getcwd()
+            dir_ = os.getcwd()
             os.chdir(destination)
             if prefer_fallback:
                 urls = [fallback, loc.path, loc.alternate]
             else:
                 urls = [loc.path, loc.alternate, fallback]
             for url in urls:
-                if not url: continue    # skip alternate if not defined
+                if not url:
+                    continue    # skip alternate if not defined
                 to_get = url + os.sep + self.file
                 try:
                     handle = os.popen("wget -c " + to_get)
                     if not handle.close():
                         self.message = "completed"
-                        os.chdir(dir)
+                        os.chdir(dir_)
                         return 1
                 except (IOError, KeyboardInterrupt):
                     # if it died or was interrupted, remove the partial file
                     # wget's restartability doesn't help, tool won't detect
                     # we're a partial file. Try to fix this ...
-                    if os.path.exists(pkgpath): os.remove(pkgpath)
+                    if os.path.exists(pkgpath):
+                        os.remove(pkgpath)
                 # failed? just try next url in loop
             else:
                 self.message = "retrieval failed"
-                os.chdir(dir)
+                os.chdir(dir_)
                 return 0
 
+
 def parse_packages():
     """Look for package files in the entities file. desired lines look like:
       <!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
@@ -256,14 +283,15 @@
             if not re.search('<!--', line):
                 yield re.search(r'(\S+)-package\s+"([^"]+)"', line)
 
-def parse_packages_oldPy():
+
+def parse_packages_oldpy():
     """Look for package files in the entities file. desired lines look like:
      <!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
      This version avoids the use of "with" for older Pythons
      Returns a match object"""
 
     try:
-        entities = open(entity_file)
+        entities = open(epaths['entity_file'])
     except IOError, message:
         usage(1, 'Cannot open entity file: \n\t%s' % message)
 
@@ -273,6 +301,7 @@
 
     entities.close()
 
+
 def parse_patches():
     """Look for package files in the entities file. desired lines look like:
      <!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
@@ -283,14 +312,15 @@
             if not re.search('<!--', line):
                 yield re.search(r'(\S+)-patch\s+"([^"]+)"', line)
 
-def parse_patches_oldPy():
+
+def parse_patches_oldpy():
     """Look for package files in the entities file. desired lines look like:
      <!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
      This version avoids the use of "with" for older Pythons
      Returns a match object"""
 
     try:
-        entities = open(entity_file)
+        entities = open(epaths['entity_file'])
     except IOError, message:
         usage(1, 'Cannot open entity file: \n\t%s' % message)
 
@@ -300,27 +330,29 @@
 
     entities.close()
 
+
 def parse_entities():
-    """Extract package entities and patch entitie from the entity files 
+    """Extract package entities and patch entitie from the entity files
     Returns a tuple containing a list of each.
     """
     # hack: we're not parsing the xml, just having the parse functions
     # apply a regular expression match, which we hope we can write adequately
     # cue stock regular expressions joke ("now you have two problems")
 
-    if sys.version_info >= (2,5):
+    if sys.version_info >= (2, 5):
         pkg_parser = parse_packages
         pch_parser = parse_patches
     else:
-        pkg_parser = parse_packages_oldPy
-        pch_parser = parse_patches_oldPy
+        pkg_parser = parse_packages_oldpy
+        pch_parser = parse_patches_oldpy
 
-    packages = [entity(match) for match in pkg_parser() if match]
-    patches = [entity(match) for match in pch_parser() if match]
+    packages = [Entity(match) for match in pkg_parser() if match]
+    patches = [Entity(match) for match in pch_parser() if match]
 
     return (packages, patches)
 
-class location:
+
+class Location(object):
     """location instances are created for each line in the locations file"""
     def __init__(self, name, path, alternate=None):
         self.name = name
@@ -335,11 +367,13 @@
 
 
 def parse_locations():
-    """Parse the locations file. Returns a list of location instances."""
+    """Parse the locations file.
+
+    Returns a list of Location instances."""
     #
     # The locations file contains lines of the form:
     #   pkgname url alternate_url // comment
-    # 
+    #
     # alternate_url is used in cases where the package may not stay
     # in one place over time (e.g., if it moves to an "old" directory
     # when a new version is released)
@@ -351,20 +385,25 @@
 
     locations = []
     for line in package_file.readlines():
-        if line[0] == '#': continue     # '#' is used for an initial comment
+        if line[0] == '#':
+            continue     # '#' is used for an initial comment
         bits = string.split(line)
         if len(bits) < 3 or bits[1] == "none" or bits[1] == "None":
-            # skip short lines, or those with no retrieve location
+            # Skip malformed lines:
+            # not enough fields, or with no retrieve location
             continue
-        if bits[2] == "none" or bits[2] == "None": bits[2] = None
-        locations.append(location(bits[0], bits[1], bits[2]))
+        if bits[2] == "none" or bits[2] == "None":
+            bits[2] = None
+        locations.append(Location(bits[0], bits[1], bits[2]))
     package_file.close()
     return locations
 
+
 def check_missing(path, collection):
-    """Scan a collection of entities, returning a tuple 
-    (list of found entities, list of missing entities).
+    """Scan a collection of entities for missing files.
+
     Generate checksums for found entities, if requested.
+    Returns a tuple with lists of (found, missing) entities.
     """
     found = []
     missing = []
@@ -375,40 +414,51 @@
             missing.append(item)
         else:
             found.append(item)
-            if check_sums or generate_sums: item.domd5()
+            if check_sums or generate_sums:
+                item.domd5()
     return found, missing
 
+
 def check_extra(path, collection):
     """Check for files in a path that are not described by entities.
+
     Creates an entity instance for each and returns a list (this
     is to be able to use a common print routine, only the names matter)
     """
     paths = dict((item.file, item) for item in collection)
     # if we require Python >= 2.7, can use dict comprehension:
-    #paths = {item.file:item for item in collection}
-    notfound = [entity((None, file)) for file in os.listdir(path) if file not in paths]
+    # paths = {item.file:item for item in collection}
+    notfound = [Entity((None, filename)) for filename in os.listdir(path)
+                if filename not in paths]
     return notfound
 
+
 def check_checksums(collection, checksums):
-    """Check checksums on entities in collection against 'checksums' dictionary.
-    Returns a tuple (entities with bad checksums, missing checksums)
+    """Check checksums on entities in collection.
+
+    Checked against 'checksums' dictionary.
+    Returns a tuple of entities with (bad, missing) checksums.
     """
-    badsums = [entity for entity in collection if entity.file in checksums 
+    badsums = [entity for entity in collection if entity.file in checksums
                and entity.md5sum != checksums[entity.file]]
     nosums = [entity for entity in collection if entity.file not in checksums]
     return badsums, nosums
 
+
 def dump_coll(collection, msg):
-    """Print a collection: use msg and a count to print a header,
-    then print the file member of each item.
+    """Print a collection.
+
+    Use msg and item count for header, then print the file member of each item.
     """
     if collection:
         print msg, len(collection)
         for item in collection:
             print item
 
+
 def report(fnd_pkg, fnd_pat, miss_pkg, miss_pat, extras):
     """Generate package/patch report.
+
     Global "noisy" controls whether there's any output
     Return non-zero on fatal error (missing files)
     """
@@ -427,6 +477,7 @@
         rv = 1
     return rv
 
+
 def sum_report(bad_pkg, bad_pat, no_pkg, no_pat):
     """Generate checksum report
     Global "noisy" controls whether there's any output
@@ -443,8 +494,9 @@
         rv = 1
     return rv
 
+
 def fetch_report(retrieved, failed, missing):
-    """Generate file fetch report. 
+    """Generate file fetch report.
     Global "noisy" controls whether there's any output
     Return non-zero on fatal error (failed retrievals).
     """
@@ -455,9 +507,11 @@
         dump_coll(failed, "Packages which failed to retrieve:")
         dump_coll(missing, "Entities missing:")
 
-    if failed or missing: return 1
+    if failed or missing:
+        return 1
     return 0
 
+
 def readmd5():
     """Read and parse a checksum file.
     Returns a dictionary of sums indexed by filename.
@@ -469,20 +523,22 @@
 
     checksums = {}
     for line in sums.readlines():
-        (sum, name) = string.split(line)
-        checksums[name] = sum
+        (cksum, name) = string.split(line)
+        checksums[name] = cksum
     sums.close()
     return checksums
 
+
 def writemd5(collection):
     """Generate a new checksum file from checksums saved in entities. """
     sums = open(epaths['md5sum_file'], 'w')
-    if noisy: 
+    if noisy:
         print "writing checksums to {md5sum_file}".format(**epaths)
     for entity in collection:
         sums.write("%s  %s\n" % (entity.md5sum, entity.file))
     sums.close()
 
+
 def retrieve_packages(missing_packages):
     """Retrieve packages identified as missing."""
     locations = parse_locations()
@@ -495,35 +551,40 @@
             # New
             if use_wget:
                 if pkg.wget(locations, epaths['fallback_url'], epaths['package_path']):
-                    retrieved = retrieved + 1
+                    retrieved += 1
                 else:
                     if pkg.message == 'retrieval failed':
                         fails.append(pkg)
                     if pkg.message == 'not found':
-                        # should not happen, but we track 'em so we can catch where
-                        # an entity is defined but not listed in package_locations
+                        # Should not happen, but we track so we can
+                        # catch where an entity is defined but not listed
+                        # in package_locations.
                         missing.append(pkg)
             else:
                 if pkg.fetch(locations, epaths['fallback_url'], epaths['package_path']):
-                    retrieved = retrieved + 1
+                    retrieved += 1
                 else:
                     if pkg.message == 'retrieval failed':
                         fails.append(pkg)
                     if pkg.message == 'not found':
-                        # should not happen, but we track 'em so we can catch where
-                        # an entity is defined but not listed in package_locations
+                        # Should not happen, but we track so we can
+                        # catch where an entity is defined but not listed
+                        # in package_locations.
                         missing.append(pkg)
     else:
         print 'Error: cannot write to package directory ({package_path})'.format(**epaths)
     return (retrieved, fails, missing)
 
+
 def delete_bad_checksums(collection):
+    """delete any file with a bad checksum"""
     if collection:
         for item in collection:
             print "Deleting " + item.file
             item.delete_file()
 
-## Main
+
+# Main
 # 1. Process command-line arguments
 shortopts = 'qe:p:d:gcs:fu:z:wh'
 longopts = ['quiet', 'entityfile=', 'packagepath=', 'patchpath=',
@@ -536,12 +597,16 @@
     usage(2, msg)
 
 if opts:
-    for (opt, arg) in opts:
-        if opt in ('--help', '-h'): usage(0)
-        if opt in ('--entityfile', '-e'): epaths['entity_file'] = arg
-        if opt in ('--packagepath', '-p'): epaths['package_path'] = arg
-        if opt in ('--patchpath', '-d'): epaths['patch_path'] = arg
-        if opt in ('--gensum', '-g'): 
+    for opt, arg in opts:
+        if opt in ('--help', '-h'):
+            usage(0)
+        if opt in ('--entityfile', '-e'):
+            epaths['entity_file'] = arg
+        if opt in ('--packagepath', '-p'):
+            epaths['package_path'] = arg
+        if opt in ('--patchpath', '-d'):
+            epaths['patch_path'] = arg
+        if opt in ('--gensum', '-g'):
             if check_sums:
                 usage(2, "check-sums and generate-sums are mutually exclusive")
             generate_sums = 'yes'
@@ -549,19 +614,27 @@
             if generate_sums:
                 usage(2, "check-sums and generate-sums are mutually exclusive")
             check_sums = 'yes'
-        if opt in ('--sumfile', '-s'): epaths['md5sum_file'] = arg
-        if opt in ('--fetch', '-f'): fetch_files = 'yes'
-        if opt in ('--updatefile', '-u'): epaths['update_file'] = arg
+        if opt in ('--sumfile', '-s'):
+            epaths['md5sum_file'] = arg
+        if opt in ('--fetch', '-f'):
+            fetch_files = 'yes'
+        if opt in ('--updatefile', '-u'):
+            epaths['update_file'] = arg
         if opt == '--dryrun':
             dry_run = 'yes'
             fetch_files = 'yes'
-        if opt in ('--fallback', '-z'): epaths['fallback_url'] = arg
-        if opt in ('--wget', '-w'): use_wget = True
-        if opt in ('--quiet', '-q'): noisy = False
+        if opt in ('--fallback', '-z'):
+            epaths['fallback_url'] = arg
+        if opt in ('--wget', '-w'):
+            use_wget = True
+        if opt in ('--quiet', '-q'):
+            noisy = False
         if opt == '--show-extras':
             show_extras = 'yes'
-        if opt == '--delete-bad': delete_bad = True
-        if opt == '--prefer-fallback': prefer_fallback = True
+        if opt == '--delete-bad':
+            delete_bad = True
+        if opt == '--prefer-fallback':
+            prefer_fallback = True
 
 # 2. Check directories are okay up front
 # also saves time to make sure the checksum file is there
@@ -579,8 +652,9 @@
 
 # 4. Scan the package and patch directories for extra files
 if epaths['package_path'] == epaths['patch_path']:
-    extras = check_extra(epaths['package_path'], found_packages + found_patches)
-else: # packages and patches in separate directories
+    extras = check_extra(epaths['package_path'],
+                         found_packages + found_patches)
+else:  # packages and patches in separate directories
     extras = check_extra(epaths['package_path'], found_packages)
     extras = extras + check_extra(epaths['patch_path'], found_patches)
 
@@ -592,14 +666,12 @@
     bad_patches, no_patches = check_checksums(found_patches, checksums)
 
 # tell us what happened
-exitcode = report(found_packages, found_patches, 
+exitcode = report(found_packages, found_patches,
                   missing_packages, missing_patches, extras)
 
 if check_sums:
-    exitcode = exitcode + \
-               sum_report(bad_packages, bad_patches, no_packages, no_patches)
+    exitcode += sum_report(bad_packages, bad_patches, no_packages, no_patches)
 
-# (sb) if we are fetching, drop the previous exitcode, otherwise we always exit with an error
 # 6. Go fetch missing files if requested, and do another report
 if fetch_files:
     retrieved = 0
@@ -610,11 +682,12 @@
     # if we checked checksums, there may also be pkgs with bad cksums:
     if check_sums and bad_packages:
         r, f, m = retrieve_packages(bad_packages)
-        retrieved = retrieved + r
-        fails = fails + f
-        missing = missing + m
+        retrieved += r
+        fails += f
+        missing += m
 
     # tell us what happened on the fetch
+    # drop the previous exitcode, otherwise we always exit with an error
     exitcode = fetch_report(retrieved, fails, missing)
 
 # 7. Delete files with bad checksums if requested.