[Lsb-messages] /var/www/bzr/lsb/devel/appbat r1009: assorted entity checker tweaks
Mats Wichmann
mats at linuxfoundation.org
Sat Dec 31 02:05:54 UTC 2016
------------------------------------------------------------
revno: 1009
committer: Mats Wichmann <mats at linuxfoundation.org>
branch nick: appbat
timestamp: Fri 2016-12-30 19:05:54 -0700
message:
assorted entity checker tweaks
modified:
extras/entitycheck.py
-------------- next part --------------
=== modified file 'extras/entitycheck.py'
--- a/extras/entitycheck.py 2016-12-30 19:24:06 +0000
+++ b/extras/entitycheck.py 2016-12-31 02:05:54 +0000
@@ -20,10 +20,10 @@
-e FILE, --entityfile=FILE -- use entity file FILE [%s]
-p PATH, --packagepath=PATH -- use PATH for packages [%s]
-d PATH, --patchpath=PATH -- use PATH for patches [%s]
--g, --gensum -- generate md5 sums for found entities
--c, --checksum -- check md5 sums against file
+-g, --gensum -- generate crypto hashes for found entities
+-c, --checksum -- check crypto hashes against file
--delete-bad -- delete files with bad checksums
--s FILE, --sumfile=FILE -- use md5sum file FILE [%s]
+-s FILE, --sumfile=FILE -- use crypto hash file FILE [%s]
-f, --fetch -- fetch missing pkgs
--dryrun -- test what pkgs would be retrieved
--show-extras -- report on unused pkgs/patches
@@ -79,7 +79,7 @@
if msg:
print "ERROR:", msg
print
- print __doc__ % (epaths['entity_file'], epaths['package_path'],
+ print __doc__ % (epaths['entity_file'], epaths['package_path'],
epaths['patch_path'], epaths['md5sum_file'],
epaths['update_file'], epaths['fallback_url'])
sys.exit(code)
@@ -88,33 +88,34 @@
class Entity(object):
"""Entity class instantiated for each entity read from the entity file."""
- def __init__(self, name, file):
+ BLOCKSIZE = 1024 * 1024
+
+ def __init__(self, name, fname):
self.name = name
- self.file = file
+ self.fname = fname
self.fullpath = ''
+ self.hash = None
@classmethod
def from_re_match(cls, match):
"""Instantiate an Entity instance using a match object.
- When reading from the entities file, regular expression matching
+ When reading from the entities file, regular expression matching
is done to filter. This method takes the match object and unpacks
it to pass on to the class constructor.
"""
- name, file = match.groups()
- return cls(name, file)
+ name, fname = match.groups()
+ return cls(name, fname)
def __repr__(self):
- return "Entity('%s', '%s')" % (self.name, self.file)
+ return "Entity('%s', '%s')" % (self.name, self.fname)
def __str__(self):
"""Entity class "string" is "\tfilename", as that's what some things want to print"""
- return '\t%s' % self.file
-
- BLOCKSIZE = 1024 * 1024
-
- def domd5(self):
- """Generate and store md5sum for this entity's filename"""
+ return '\t%s' % self.fname
+
+ def dohash(self):
+ """Generate and store hash for this entity's filename"""
f = open(self.fullpath, "rb")
cksum = hashlib.md5()
while 1:
@@ -123,8 +124,7 @@
break
cksum.update(block)
f.close()
- s = cksum.digest()
- self.md5sum = "%02x" * len(s) % tuple(map(ord, s))
+ self.hash = cksum.hexdigest()
def delete_file(self):
"""Delete this entity's file, if it exists"""
@@ -174,17 +174,17 @@
find the package. A 'fallback' url is tried if they fail.
"""
self.message = "not found"
- self.front = "%s:" % self.file
- for loc in locations:
- if self.name != loc.name:
+ self.front = "%s:" % self.fname
+ for name, path, alternate in locations:
+ if self.name != name:
continue
- pkgpath = "%s/%s" % (destination, self.file)
+ pkgpath = "%s/%s" % (destination, self.fname)
print self.front,
sys.stdout.flush() # force text to be displayed
if dry_run: # just print a message and bail
self.message = "fetch %s from (%s, ...) to %s" % \
- (self.file, loc.path, destination)
+ (self.fname, path, destination)
self.running_output()
print # line break so previous message isn't overwritten
self.message = "skipped"
@@ -193,13 +193,13 @@
# try up to three times to fetch the file
numtries = 1
if prefer_fallback:
- urls = [fallback, loc.path, loc.alternate]
+ urls = [fallback, path, alternate]
else:
- urls = [loc.path, loc.alternate, fallback]
+ urls = [path, alternate, fallback]
for url in urls:
if not url:
continue # skip alternate if not defined
- to_get = "%s/%s" % (url, self.file)
+ to_get = "%s/%s" % (url, self.fname)
try:
self.start = time.time()
@@ -239,14 +239,14 @@
find the package. A 'fallback' url is tried if they fail.
"""
self.message = "not found"
- for loc in locations:
- if self.name != loc.name:
+ for name, path, alternate in locations:
+ if self.name != name:
continue
- pkgpath = "%s/%s" % (destination, self.file)
+ pkgpath = "%s/%s" % (destination, self.fname)
if dry_run: # just print a message and bail
print "fetch %s from (%s, ...) to %s" % \
- (self.file, loc.path, destination)
+ (self.fname, path, destination)
self.message = "skipped"
break
@@ -254,13 +254,13 @@
dir_ = os.getcwd()
os.chdir(destination)
if prefer_fallback:
- urls = [fallback, loc.path, loc.alternate]
+ urls = [fallback, path, alternate]
else:
- urls = [loc.path, loc.alternate, fallback]
+ urls = [path, alternate, fallback]
for url in urls:
if not url:
continue # skip alternate if not defined
- to_get = url + os.sep + self.file
+ to_get = url + os.sep + self.fname
try:
handle = os.popen("wget -c " + to_get)
if not handle.close():
@@ -281,10 +281,12 @@
def parse_packages():
- """Look for package files in the entities file. desired lines look like:
- <!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
- Returns a match object"""
+ """Look for package files in the entities file.
+ Desired lines look like the following, but need to skip XML comments:
+ <!ENTITY foo-package "foo-1.2.tar.bz2">
+ Returns a match object.
+ """
try:
entities = open(epaths['entity_file'])
except IOError, message:
@@ -298,10 +300,12 @@
def parse_patches():
- """Look for package files in the entities file. desired lines look like:
- <!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
- Returns a match object"""
+ """Look for patch files in the entities file.
+ Desired lines look like the following, but need to skip XML comments:
+ <!ENTITY foo-patch "foo-1.2.patch">
+ Returns a match object.
+ """
try:
entities = open(epaths['entity_file'])
except IOError, message:
@@ -315,12 +319,13 @@
def parse_entities():
- """Extract package entities and patch entitie from the entity files
+ """Extract package and patch entities from the entity files.
+
Returns a tuple containing a list of each.
"""
- # hack: we're not parsing the xml, just having the parse functions
- # apply a regular expression match, which we hope we can write adequately
- # cue stock regular expressions joke ("now you have two problems")
+ # Hack: we're not parsing the xml, just having the parse functions
+ # apply a regular expression match, which we hope we can write adequately.
+ # Cue stock regular expressions joke ("now you have two problems").
packages = [
Entity.from_re_match(match) for match in parse_packages() if match
@@ -332,34 +337,16 @@
return (packages, patches)
-class Location(object):
- """location instances are created for each line in the locations file"""
-
- def __init__(self, name, path, alternate=None):
- self.name = name
- self.path = path
- self.alternate = alternate
-
- def __repr__(self):
- return 'location(%s, %s, %s)' % (self.name, self.path,
- self.alternate)
-
- def __str__(self):
- return 'Locations: %s, %s, %s' % (self.name, self.path,
- self.alternate)
-
-
def parse_locations():
"""Parse the locations file.
- Returns a list of Location instances."""
- #
- # The locations file contains lines of the form:
- # pkgname url alternate_url // comment
- #
+ The locations file contains lines of the form:
+ pkgname url alternate_url // comment
+ Returns a list of location tuples after some adjustment.
+ """
# alternate_url is used in cases where the package may not stay
- # in one place over time (e.g., if it moves to an "old" directory
- # when a new version is released)
+ # in one upstream place over time (e.g., if it moves to an "old"
+ # directory when a new version is released) - if we guess the pattern!
try:
package_file = open(epaths['update_file'])
@@ -370,14 +357,14 @@
for line in package_file.readlines():
if line[0] == '#':
continue # '#' is used for an initial comment
- bits = string.split(line)
+ bits = line.split()
if len(bits) < 3 or bits[1] == "none" or bits[1] == "None":
# Skip malformed lines:
# not enough fields, or with no retrieve location
continue
if bits[2] == "none" or bits[2] == "None":
bits[2] = None
- locations.append(Location(bits[0], bits[1], bits[2]))
+ locations.append((bits[0], bits[1], bits[2]))
package_file.close()
return locations
@@ -392,13 +379,13 @@
missing = []
for item in collection:
# save fullpath in the entity instance since we'll use it again
- item.fullpath = os.path.join(path, item.file)
+ item.fullpath = os.path.join(path, item.fname)
if not os.path.exists(item.fullpath):
missing.append(item)
else:
found.append(item)
if check_sums or generate_sums:
- item.domd5()
+ item.dohash()
return found, missing
@@ -408,9 +395,7 @@
Creates an entity instance for each and returns a list (this
is to be able to use a common print routine, only the names matter)
"""
- paths = dict((item.file, item) for item in collection)
- # if we require Python >= 2.7, can use dict comprehension:
- # paths = {item.file:item for item in collection}
+ paths = dict((item.fname, item) for item in collection)
notfound = [
Entity(None, filename) for filename in os.listdir(path)
if filename not in paths
@@ -426,9 +411,9 @@
"""
badsums = [
entity for entity in collection
- if entity.file in checksums and entity.md5sum != checksums[entity.file]
+ if entity.fname in checksums and entity.hash != checksums[entity.fname]
]
- nosums = [entity for entity in collection if entity.file not in checksums]
+ nosums = [entity for entity in collection if entity.fname not in checksums]
return badsums, nosums
@@ -499,7 +484,7 @@
return 0
-def readmd5():
+def readhash():
"""Read and parse a checksum file.
Returns a dictionary of sums indexed by filename.
"""
@@ -510,19 +495,19 @@
checksums = {}
for line in sums.readlines():
- (cksum, name) = string.split(line)
+ (cksum, name) = line.split()
checksums[name] = cksum
sums.close()
return checksums
-def writemd5(collection):
+def writehash(collection):
"""Generate a new checksum file from checksums saved in entities. """
sums = open(epaths['md5sum_file'], 'w')
if noisy:
- print "writing checksums to", epaths['md5sum_file']
+ print "writing checksums to", epaths['md5sum_file']
for entity in collection:
- sums.write("%s %s\n" % (entity.md5sum, entity.file))
+ sums.write("%s %s\n" % (entity.hash, entity.fname))
sums.close()
@@ -531,7 +516,6 @@
locations = parse_locations()
retrieved = 0
fails, missing = [], []
- global initial_text
if os.access(epaths['package_path'], os.W_OK):
print "Retrieving..."
for pkg in missing_packages:
@@ -570,7 +554,7 @@
"""delete any file with a bad checksum"""
if collection:
for item in collection:
- print "Deleting " + item.file
+ print "Deleting " + item.fname
item.delete_file()
@@ -652,16 +636,15 @@
# 5. check checksums, if requested
# Whether doing sums or not, generate a report of the work to date
+# so do that work first - it makes the ordering a little more sane
+
+exitcode = report(found_packages, found_patches, missing_packages,
+ missing_patches, extras)
+
if check_sums:
- checksums = readmd5()
+ checksums = readhash()
bad_packages, no_packages = check_checksums(found_packages, checksums)
bad_patches, no_patches = check_checksums(found_patches, checksums)
-
-# tell us what happened
-exitcode = report(found_packages, found_patches, missing_packages,
- missing_patches, extras)
-
-if check_sums:
exitcode += sum_report(bad_packages, bad_patches, no_packages, no_patches)
# 6. Go fetch missing files if requested, and do another report
@@ -683,6 +666,8 @@
exitcode = fetch_report(retrieved, fails, missing)
# 7. Delete files with bad checksums if requested.
+# TODO: this looks dubious, what if we found bad files, fetched them,
+# and now the original list isn't all bad?
if check_sums and delete_bad:
delete_bad_checksums(bad_packages + bad_patches)
@@ -696,6 +681,6 @@
# 9. Generate a new checksum file, if requested (only if nothing fatal
# happened above)
if generate_sums:
- writemd5(found_packages + found_patches)
+ writehash(found_packages + found_patches)
sys.exit(exitcode)
More information about the lsb-messages
mailing list