[Lsb-messages] /var/www/bzr/lsb/devel/appbat r1005: more Pythonic cleanup
Mats Wichmann
mats at linuxfoundation.org
Fri Dec 30 15:33:27 UTC 2016
------------------------------------------------------------
revno: 1005
committer: Mats Wichmann <mats at linuxfoundation.org>
branch nick: appbat
timestamp: Fri 2016-12-30 08:33:27 -0700
message:
more Pythonic cleanup
modified:
extras/entitycheck.py
-------------- next part --------------
=== modified file 'extras/entitycheck.py'
--- a/extras/entitycheck.py 2016-12-30 00:41:26 +0000
+++ b/extras/entitycheck.py 2016-12-30 15:33:27 +0000
@@ -28,26 +28,33 @@
--dryrun -- test what pkgs would be retrieved
--show-extras -- report on unused pkgs/patches
-u FILE, --updatefile=FILE -- use FILE for pkg locations [{update_file}]
--z URL, --fallback=URL -- use URL as fallback for pkg retrieval
+-z URL, --fallback=URL -- use URL as fallback for pkg retrieval
[{fallback_url}]
--prefer-fallback -- prefer the fallback URL over upstream
-w, --wget -- use wget to fetch files
-h, --help -- print this text and exit
"""
-import sys, re, os, getopt, string, urllib, time, traceback
+import getopt
+import os
+import re
+import string
+import sys
+import time
+import traceback
+import urllib
# Handle hashing module (md5 is the old way, hashlib is the new)
try:
import hashlib
-except:
+except ImportError:
import md5 as hashlib
# PATH DEFAULTS the only stuff that should need tuning
# These are used as globals throughout
sys.path.append('./extras')
try:
- from entitypath import *
+ from entitypath import epaths
except ImportError:
print "Cannot find configuration file <extras/entitypath.py>"
print "Probably need to run ./Configure or ./configure"
@@ -66,46 +73,51 @@
delete_bad = False
prefer_fallback = False
-# this one is for checking in entity.__init__
+# this one is for checking in Entity.__init__
SRE_MATCH_TYPE = type(re.match("", ""))
+
def usage(code, msg=''):
- if msg: print "ERROR:", msg; print
+ """Print usage, possibly with an error message preceding"""
+ if msg:
+ print "ERROR: {}\n".format(msg)
print __doc__.format(**epaths)
sys.exit(code)
-class entity:
- """entity class, instantiate for each entity read from the entity file.
+
+class Entity(object):
+ """Entity class, instantiate for each entity read from the entity file.
If so argument is a match object from re module. However entities are
also built "by hand" from checking extra files, so have to check type
"""
def __init__(self, match):
- if type(match) is SRE_MATCH_TYPE:
+ if isinstance(match, SRE_MATCH_TYPE):
self.name, self.file = match.groups()
else:
self.name, self.file = match
+ self.fullpath = ''
+
def __repr__(self):
- return "entity('{}', '{}')".format(self.name, self.file)
+ return "Entity('{}', '{}')".format(self.name, self.file)
def __str__(self):
- '''entity class "string" is "\tfilename", as that's what some things want to print'''
+ """Entity class "string" is "\tfilename", as that's what some things want to print"""
return '\t{}'.format(self.file)
-
BLOCKSIZE = 1024*1024
def domd5(self):
"""Generate and store md5sum for this entity's filename"""
f = open(self.fullpath, "rb")
- sum = hashlib.md5()
+ cksum = hashlib.md5()
while 1:
- block = f.read(entity.BLOCKSIZE)
+ block = f.read(Entity.BLOCKSIZE)
if not block:
break
- sum.update(block)
+ cksum.update(block)
f.close()
- s = sum.digest()
+ s = cksum.digest()
self.md5sum = "%02x"*len(s) % tuple(map(ord, s))
def delete_file(self):
@@ -128,7 +140,8 @@
def feedback(self, block_count, block_size, total):
"""Callback function for urllib.urlretrieve()"""
so_far = block_count * block_size
- if not so_far: return
+ if not so_far:
+ return
pct = so_far * 100L / total
# Work out remaining time: need transfer rate first
now = time.time()
@@ -136,14 +149,17 @@
rate = so_far / elapsed
left = (total - so_far) / rate
tmp = time.gmtime(left)
- if tmp[3]: timestr = "%2d:" % tmp[3]
- else: timestr = "" # skip hours if zero
+ if tmp[3]:
+ timestr = "%2d:" % tmp[3]
+ else:
+ timestr = "" # skip hours if zero
timestr = "%s%02d:%02d remaining" % (timestr, tmp[4], tmp[5])
self.message = "%d of %d bytes (%d%%) %s" % (so_far, total, pct, timestr)
self.running_output()
def fetch(self, locations, fallback, destination):
- """retrieve the file for an entity.
+ """Retrieve the file for an entity using urllib.
+
Calls urllib.urlretrieve() up to three times to retrieve the
package. Retrieval locations are looked up in 'locations',
the name of a locations instance must match our name, that
@@ -153,7 +169,8 @@
self.message = "not found"
self.front = "%s:" % self.file
for loc in locations:
- if self.name != loc.name: continue
+ if self.name != loc.name:
+ continue
pkgpath = "%s/%s" % (destination, self.file)
print self.front,
sys.stdout.flush() # force text to be displayed
@@ -173,7 +190,8 @@
else:
urls = [loc.path, loc.alternate, fallback]
for url in urls:
- if not url: continue # skip alternate if not defined
+ if not url:
+ continue # skip alternate if not defined
to_get = "%s/%s" % (url, self.file)
try:
@@ -184,25 +202,29 @@
raise IOError
self.message = "completed"
break
- except:
+ except IOError:
sys.stdout.write("\n")
traceback.print_exc()
sys.stderr.write("try %d failed: %s\n" % (numtries, url))
- numtries = numtries + 1
+ numtries += 1
# if it died or was interrupted, remove the partial file
- if os.path.exists(pkgpath): os.remove(pkgpath)
+ if os.path.exists(pkgpath):
+ os.remove(pkgpath)
# failed? just try next url in loop
# if we didn't "break" out of inner loop, the fetch failed
- else: self.message = "retrieval failed"
+ else:
+ self.message = "retrieval failed"
break
self.running_output()
print
- if self.message == "completed": return 1
+ if self.message == "completed":
+ return 1
return 0
def wget(self, locations, fallback, destination):
- """retrieve the file for an entity.
+ """Retrieve the file for an entity using wget.
+
Calls external command wget up to three times to retrieve the
package. Retrieval locations are looked up in 'locations',
the name of a locations instance must match our name, that
@@ -211,7 +233,9 @@
"""
self.message = "not found"
for loc in locations:
- if self.name != loc.name: continue
+ if self.name != loc.name:
+ continue
+ pkgpath = "%s/%s" % (destination, self.file)
if dry_run: # just print a message and bail
print "fetch %s from (%s, ...) to %s" % \
@@ -220,32 +244,35 @@
break
# try up to three times to fetch the file
- dir = os.getcwd()
+ dir_ = os.getcwd()
os.chdir(destination)
if prefer_fallback:
urls = [fallback, loc.path, loc.alternate]
else:
urls = [loc.path, loc.alternate, fallback]
for url in urls:
- if not url: continue # skip alternate if not defined
+ if not url:
+ continue # skip alternate if not defined
to_get = url + os.sep + self.file
try:
handle = os.popen("wget -c " + to_get)
if not handle.close():
self.message = "completed"
- os.chdir(dir)
+ os.chdir(dir_)
return 1
except (IOError, KeyboardInterrupt):
# if it died or was interrupted, remove the partial file
# wget's restartability doesn't help, tool won't detect
# we're a partial file. Try to fix this ...
- if os.path.exists(pkgpath): os.remove(pkgpath)
+ if os.path.exists(pkgpath):
+ os.remove(pkgpath)
# failed? just try next url in loop
else:
self.message = "retrieval failed"
- os.chdir(dir)
+ os.chdir(dir_)
return 0
+
def parse_packages():
"""Look for package files in the entities file. desired lines look like:
<!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
@@ -256,14 +283,15 @@
if not re.search('<!--', line):
yield re.search(r'(\S+)-package\s+"([^"]+)"', line)
-def parse_packages_oldPy():
+
+def parse_packages_oldpy():
"""Look for package files in the entities file. desired lines look like:
<!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
This version avoids the use of "with" for older Pythons
Returns a match object"""
try:
- entities = open(entity_file)
+ entities = open(epaths['entity_file'])
except IOError, message:
usage(1, 'Cannot open entity file: \n\t%s' % message)
@@ -273,6 +301,7 @@
entities.close()
+
def parse_patches():
"""Look for package files in the entities file. desired lines look like:
<!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
@@ -283,14 +312,15 @@
if not re.search('<!--', line):
yield re.search(r'(\S+)-patch\s+"([^"]+)"', line)
-def parse_patches_oldPy():
+
+def parse_patches_oldpy():
"""Look for package files in the entities file. desired lines look like:
<!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
This version avoids the use of "with" for older Pythons
Returns a match object"""
try:
- entities = open(entity_file)
+ entities = open(epaths['entity_file'])
except IOError, message:
usage(1, 'Cannot open entity file: \n\t%s' % message)
@@ -300,27 +330,29 @@
entities.close()
+
def parse_entities():
- """Extract package entities and patch entitie from the entity files
+ """Extract package entities and patch entitie from the entity files
Returns a tuple containing a list of each.
"""
# hack: we're not parsing the xml, just having the parse functions
# apply a regular expression match, which we hope we can write adequately
# cue stock regular expressions joke ("now you have two problems")
- if sys.version_info >= (2,5):
+ if sys.version_info >= (2, 5):
pkg_parser = parse_packages
pch_parser = parse_patches
else:
- pkg_parser = parse_packages_oldPy
- pch_parser = parse_patches_oldPy
+ pkg_parser = parse_packages_oldpy
+ pch_parser = parse_patches_oldpy
- packages = [entity(match) for match in pkg_parser() if match]
- patches = [entity(match) for match in pch_parser() if match]
+ packages = [Entity(match) for match in pkg_parser() if match]
+ patches = [Entity(match) for match in pch_parser() if match]
return (packages, patches)
-class location:
+
+class Location(object):
"""location instances are created for each line in the locations file"""
def __init__(self, name, path, alternate=None):
self.name = name
@@ -335,11 +367,13 @@
def parse_locations():
- """Parse the locations file. Returns a list of location instances."""
+ """Parse the locations file.
+
+ Returns a list of Location instances."""
#
# The locations file contains lines of the form:
# pkgname url alternate_url // comment
- #
+ #
# alternate_url is used in cases where the package may not stay
# in one place over time (e.g., if it moves to an "old" directory
# when a new version is released)
@@ -351,20 +385,25 @@
locations = []
for line in package_file.readlines():
- if line[0] == '#': continue # '#' is used for an initial comment
+ if line[0] == '#':
+ continue # '#' is used for an initial comment
bits = string.split(line)
if len(bits) < 3 or bits[1] == "none" or bits[1] == "None":
- # skip short lines, or those with no retrieve location
+ # Skip malformed lines:
+ # not enough fields, or with no retrieve location
continue
- if bits[2] == "none" or bits[2] == "None": bits[2] = None
- locations.append(location(bits[0], bits[1], bits[2]))
+ if bits[2] == "none" or bits[2] == "None":
+ bits[2] = None
+ locations.append(Location(bits[0], bits[1], bits[2]))
package_file.close()
return locations
+
def check_missing(path, collection):
- """Scan a collection of entities, returning a tuple
- (list of found entities, list of missing entities).
+ """Scan a collection of entities for missing files.
+
Generate checksums for found entities, if requested.
+ Returns a tuple with lists of (found, missing) entities.
"""
found = []
missing = []
@@ -375,40 +414,51 @@
missing.append(item)
else:
found.append(item)
- if check_sums or generate_sums: item.domd5()
+ if check_sums or generate_sums:
+ item.domd5()
return found, missing
+
def check_extra(path, collection):
"""Check for files in a path that are not described by entities.
+
Creates an entity instance for each and returns a list (this
is to be able to use a common print routine, only the names matter)
"""
paths = dict((item.file, item) for item in collection)
# if we require Python >= 2.7, can use dict comprehension:
- #paths = {item.file:item for item in collection}
- notfound = [entity((None, file)) for file in os.listdir(path) if file not in paths]
+ # paths = {item.file:item for item in collection}
+ notfound = [Entity((None, filename)) for filename in os.listdir(path)
+ if filename not in paths]
return notfound
+
def check_checksums(collection, checksums):
- """Check checksums on entities in collection against 'checksums' dictionary.
- Returns a tuple (entities with bad checksums, missing checksums)
+ """Check checksums on entities in collection.
+
+ Checked against 'checksums' dictionary.
+ Returns a tuple of entities with (bad, missing) checksums.
"""
- badsums = [entity for entity in collection if entity.file in checksums
+ badsums = [entity for entity in collection if entity.file in checksums
and entity.md5sum != checksums[entity.file]]
nosums = [entity for entity in collection if entity.file not in checksums]
return badsums, nosums
+
def dump_coll(collection, msg):
- """Print a collection: use msg and a count to print a header,
- then print the file member of each item.
+ """Print a collection.
+
+ Use msg and item count for header, then print the file member of each item.
"""
if collection:
print msg, len(collection)
for item in collection:
print item
+
def report(fnd_pkg, fnd_pat, miss_pkg, miss_pat, extras):
"""Generate package/patch report.
+
Global "noisy" controls whether there's any output
Return non-zero on fatal error (missing files)
"""
@@ -427,6 +477,7 @@
rv = 1
return rv
+
def sum_report(bad_pkg, bad_pat, no_pkg, no_pat):
"""Generate checksum report
Global "noisy" controls whether there's any output
@@ -443,8 +494,9 @@
rv = 1
return rv
+
def fetch_report(retrieved, failed, missing):
- """Generate file fetch report.
+ """Generate file fetch report.
Global "noisy" controls whether there's any output
Return non-zero on fatal error (failed retrievals).
"""
@@ -455,9 +507,11 @@
dump_coll(failed, "Packages which failed to retrieve:")
dump_coll(missing, "Entities missing:")
- if failed or missing: return 1
+ if failed or missing:
+ return 1
return 0
+
def readmd5():
"""Read and parse a checksum file.
Returns a dictionary of sums indexed by filename.
@@ -469,20 +523,22 @@
checksums = {}
for line in sums.readlines():
- (sum, name) = string.split(line)
- checksums[name] = sum
+ (cksum, name) = string.split(line)
+ checksums[name] = cksum
sums.close()
return checksums
+
def writemd5(collection):
"""Generate a new checksum file from checksums saved in entities. """
sums = open(epaths['md5sum_file'], 'w')
- if noisy:
+ if noisy:
print "writing checksums to {md5sum_file}".format(**epaths)
for entity in collection:
sums.write("%s %s\n" % (entity.md5sum, entity.file))
sums.close()
+
def retrieve_packages(missing_packages):
"""Retrieve packages identified as missing."""
locations = parse_locations()
@@ -495,35 +551,40 @@
# New
if use_wget:
if pkg.wget(locations, epaths['fallback_url'], epaths['package_path']):
- retrieved = retrieved + 1
+ retrieved += 1
else:
if pkg.message == 'retrieval failed':
fails.append(pkg)
if pkg.message == 'not found':
- # should not happen, but we track 'em so we can catch where
- # an entity is defined but not listed in package_locations
+ # Should not happen, but we track so we can
+ # catch where an entity is defined but not listed
+ # in package_locations.
missing.append(pkg)
else:
if pkg.fetch(locations, epaths['fallback_url'], epaths['package_path']):
- retrieved = retrieved + 1
+ retrieved += 1
else:
if pkg.message == 'retrieval failed':
fails.append(pkg)
if pkg.message == 'not found':
- # should not happen, but we track 'em so we can catch where
- # an entity is defined but not listed in package_locations
+ # Should not happen, but we track so we can
+ # catch where an entity is defined but not listed
+ # in package_locations.
missing.append(pkg)
else:
print 'Error: cannot write to package directory ({package_path})'.format(**epaths)
return (retrieved, fails, missing)
+
def delete_bad_checksums(collection):
+ """delete any file with a bad checksum"""
if collection:
for item in collection:
print "Deleting " + item.file
item.delete_file()
-## Main
+
+# Main
# 1. Process command-line arguments
shortopts = 'qe:p:d:gcs:fu:z:wh'
longopts = ['quiet', 'entityfile=', 'packagepath=', 'patchpath=',
@@ -536,12 +597,16 @@
usage(2, msg)
if opts:
- for (opt, arg) in opts:
- if opt in ('--help', '-h'): usage(0)
- if opt in ('--entityfile', '-e'): epaths['entity_file'] = arg
- if opt in ('--packagepath', '-p'): epaths['package_path'] = arg
- if opt in ('--patchpath', '-d'): epaths['patch_path'] = arg
- if opt in ('--gensum', '-g'):
+ for opt, arg in opts:
+ if opt in ('--help', '-h'):
+ usage(0)
+ if opt in ('--entityfile', '-e'):
+ epaths['entity_file'] = arg
+ if opt in ('--packagepath', '-p'):
+ epaths['package_path'] = arg
+ if opt in ('--patchpath', '-d'):
+ epaths['patch_path'] = arg
+ if opt in ('--gensum', '-g'):
if check_sums:
usage(2, "check-sums and generate-sums are mutually exclusive")
generate_sums = 'yes'
@@ -549,19 +614,27 @@
if generate_sums:
usage(2, "check-sums and generate-sums are mutually exclusive")
check_sums = 'yes'
- if opt in ('--sumfile', '-s'): epaths['md5sum_file'] = arg
- if opt in ('--fetch', '-f'): fetch_files = 'yes'
- if opt in ('--updatefile', '-u'): epaths['update_file'] = arg
+ if opt in ('--sumfile', '-s'):
+ epaths['md5sum_file'] = arg
+ if opt in ('--fetch', '-f'):
+ fetch_files = 'yes'
+ if opt in ('--updatefile', '-u'):
+ epaths['update_file'] = arg
if opt == '--dryrun':
dry_run = 'yes'
fetch_files = 'yes'
- if opt in ('--fallback', '-z'): epaths['fallback_url'] = arg
- if opt in ('--wget', '-w'): use_wget = True
- if opt in ('--quiet', '-q'): noisy = False
+ if opt in ('--fallback', '-z'):
+ epaths['fallback_url'] = arg
+ if opt in ('--wget', '-w'):
+ use_wget = True
+ if opt in ('--quiet', '-q'):
+ noisy = False
if opt == '--show-extras':
show_extras = 'yes'
- if opt == '--delete-bad': delete_bad = True
- if opt == '--prefer-fallback': prefer_fallback = True
+ if opt == '--delete-bad':
+ delete_bad = True
+ if opt == '--prefer-fallback':
+ prefer_fallback = True
# 2. Check directories are okay up front
# also saves time to make sure the checksum file is there
@@ -579,8 +652,9 @@
# 4. Scan the package and patch directories for extra files
if epaths['package_path'] == epaths['patch_path']:
- extras = check_extra(epaths['package_path'], found_packages + found_patches)
-else: # packages and patches in separate directories
+ extras = check_extra(epaths['package_path'],
+ found_packages + found_patches)
+else: # packages and patches in separate directories
extras = check_extra(epaths['package_path'], found_packages)
extras = extras + check_extra(epaths['patch_path'], found_patches)
@@ -592,14 +666,12 @@
bad_patches, no_patches = check_checksums(found_patches, checksums)
# tell us what happened
-exitcode = report(found_packages, found_patches,
+exitcode = report(found_packages, found_patches,
missing_packages, missing_patches, extras)
if check_sums:
- exitcode = exitcode + \
- sum_report(bad_packages, bad_patches, no_packages, no_patches)
+ exitcode += sum_report(bad_packages, bad_patches, no_packages, no_patches)
-# (sb) if we are fetching, drop the previous exitcode, otherwise we always exit with an error
# 6. Go fetch missing files if requested, and do another report
if fetch_files:
retrieved = 0
@@ -610,11 +682,12 @@
# if we checked checksums, there may also be pkgs with bad cksums:
if check_sums and bad_packages:
r, f, m = retrieve_packages(bad_packages)
- retrieved = retrieved + r
- fails = fails + f
- missing = missing + m
+ retrieved += r
+ fails += f
+ missing += m
# tell us what happened on the fetch
+ # drop the previous exitcode, otherwise we always exit with an error
exitcode = fetch_report(retrieved, fails, missing)
# 7. Delete files with bad checksums if requested.
More information about the lsb-messages
mailing list