[Lsb-messages] /var/www/bzr/lsb/devel/appbat r1009: assorted entity checker tweaks

Mats Wichmann mats at linuxfoundation.org
Sat Dec 31 02:05:54 UTC 2016


------------------------------------------------------------
revno: 1009
committer: Mats Wichmann <mats at linuxfoundation.org>
branch nick: appbat
timestamp: Fri 2016-12-30 19:05:54 -0700
message:
  assorted entity checker tweaks
modified:
  extras/entitycheck.py
-------------- next part --------------
=== modified file 'extras/entitycheck.py'
--- a/extras/entitycheck.py	2016-12-30 19:24:06 +0000
+++ b/extras/entitycheck.py	2016-12-31 02:05:54 +0000
@@ -20,10 +20,10 @@
 -e FILE, --entityfile=FILE    -- use entity file FILE [%s]
 -p PATH, --packagepath=PATH   -- use PATH for packages [%s]
 -d PATH, --patchpath=PATH     -- use PATH for patches [%s]
--g, --gensum                  -- generate md5 sums for found entities
--c, --checksum                -- check md5 sums against file
+-g, --gensum                  -- generate crypto hashes for found entities
+-c, --checksum                -- check crypto hashes against file
 --delete-bad                  -- delete files with bad checksums
--s FILE, --sumfile=FILE       -- use md5sum file FILE [%s]
+-s FILE, --sumfile=FILE       -- use crypto hash file FILE [%s]
 -f, --fetch                   -- fetch missing pkgs
 --dryrun                      -- test what pkgs would be retrieved
 --show-extras                 -- report on unused pkgs/patches
@@ -79,7 +79,7 @@
     if msg:
         print "ERROR:", msg
         print
-    print __doc__ % (epaths['entity_file'], epaths['package_path'], 
+    print __doc__ % (epaths['entity_file'], epaths['package_path'],
                      epaths['patch_path'], epaths['md5sum_file'],
                      epaths['update_file'], epaths['fallback_url'])
     sys.exit(code)
@@ -88,33 +88,34 @@
 class Entity(object):
     """Entity class instantiated for each entity read from the entity file."""
 
-    def __init__(self, name, file):
+    BLOCKSIZE = 1024 * 1024
+
+    def __init__(self, name, fname):
         self.name = name
-        self.file = file
+        self.fname = fname
         self.fullpath = ''
+        self.hash = None
 
     @classmethod
     def from_re_match(cls, match):
         """Instantiate an Entity instance using a match object.
 
-        When reading from the entities file, regular expression matching 
+        When reading from the entities file, regular expression matching
         is done to filter. This method takes the match object and unpacks
         it to pass on to the class constructor.
         """
-        name, file = match.groups()
-        return cls(name, file)
+        name, fname = match.groups()
+        return cls(name, fname)
 
     def __repr__(self):
-        return "Entity('%s', '%s')" % (self.name, self.file)
+        return "Entity('%s', '%s')" % (self.name, self.fname)
 
     def __str__(self):
         """Entity class "string" is "\tfilename", as that's what some things want to print"""
-        return '\t%s' % self.file
-
-    BLOCKSIZE = 1024 * 1024
-
-    def domd5(self):
-        """Generate and store md5sum for this entity's filename"""
+        return '\t%s' % self.fname
+
+    def dohash(self):
+        """Generate and store hash for this entity's filename"""
         f = open(self.fullpath, "rb")
         cksum = hashlib.md5()
         while 1:
@@ -123,8 +124,7 @@
                 break
             cksum.update(block)
         f.close()
-        s = cksum.digest()
-        self.md5sum = "%02x" * len(s) % tuple(map(ord, s))
+        self.hash = cksum.hexdigest()
 
     def delete_file(self):
         """Delete this entity's file, if it exists"""
@@ -174,17 +174,17 @@
         find the package. A 'fallback' url is tried if they fail.
         """
         self.message = "not found"
-        self.front = "%s:" % self.file
-        for loc in locations:
-            if self.name != loc.name:
+        self.front = "%s:" % self.fname
+        for name, path, alternate in locations:
+            if self.name != name:
                 continue
-            pkgpath = "%s/%s" % (destination, self.file)
+            pkgpath = "%s/%s" % (destination, self.fname)
             print self.front,
             sys.stdout.flush()  # force text to be displayed
 
             if dry_run:  # just print a message and bail
                 self.message = "fetch %s from (%s, ...) to %s" % \
-                               (self.file, loc.path, destination)
+                               (self.fname, path, destination)
                 self.running_output()
                 print  # line break so previous message isn't overwritten
                 self.message = "skipped"
@@ -193,13 +193,13 @@
             # try up to three times to fetch the file
             numtries = 1
             if prefer_fallback:
-                urls = [fallback, loc.path, loc.alternate]
+                urls = [fallback, path, alternate]
             else:
-                urls = [loc.path, loc.alternate, fallback]
+                urls = [path, alternate, fallback]
             for url in urls:
                 if not url:
                     continue  # skip alternate if not defined
-                to_get = "%s/%s" % (url, self.file)
+                to_get = "%s/%s" % (url, self.fname)
 
                 try:
                     self.start = time.time()
@@ -239,14 +239,14 @@
         find the package. A 'fallback' url is tried if they fail.
         """
         self.message = "not found"
-        for loc in locations:
-            if self.name != loc.name:
+        for name, path, alternate in locations:
+            if self.name != name:
                 continue
-            pkgpath = "%s/%s" % (destination, self.file)
+            pkgpath = "%s/%s" % (destination, self.fname)
 
             if dry_run:  # just print a message and bail
                 print "fetch %s from (%s, ...) to %s" % \
-                               (self.file, loc.path, destination)
+                               (self.fname, path, destination)
                 self.message = "skipped"
                 break
 
@@ -254,13 +254,13 @@
             dir_ = os.getcwd()
             os.chdir(destination)
             if prefer_fallback:
-                urls = [fallback, loc.path, loc.alternate]
+                urls = [fallback, path, alternate]
             else:
-                urls = [loc.path, loc.alternate, fallback]
+                urls = [path, alternate, fallback]
             for url in urls:
                 if not url:
                     continue  # skip alternate if not defined
-                to_get = url + os.sep + self.file
+                to_get = url + os.sep + self.fname
                 try:
                     handle = os.popen("wget -c " + to_get)
                     if not handle.close():
@@ -281,10 +281,12 @@
 
 
 def parse_packages():
-    """Look for package files in the entities file. desired lines look like:
-     <!ENTITY foo-package "foo-1.2.tar.bz2">, but we need to skip XML comments.
-     Returns a match object"""
+    """Look for package files in the entities file.
 
+    Desired lines look like the following, but need to skip XML comments:
+    <!ENTITY foo-package "foo-1.2.tar.bz2">
+    Returns a match object.
+    """
     try:
         entities = open(epaths['entity_file'])
     except IOError, message:
@@ -298,10 +300,12 @@
 
 
 def parse_patches():
-    """Look for package files in the entities file. desired lines look like:
-     <!ENTITY foo-patch "foo-1.2.patch">, but we need to skip XML comments.
-     Returns a match object"""
+    """Look for patch files in the entities file.
 
+    Desired lines look like the following, but need to skip XML comments:
+    <!ENTITY foo-patch "foo-1.2.patch">
+    Returns a match object.
+    """
     try:
         entities = open(epaths['entity_file'])
     except IOError, message:
@@ -315,12 +319,13 @@
 
 
 def parse_entities():
-    """Extract package entities and patch entitie from the entity files
+    """Extract package and patch entities from the entity files.
+
     Returns a tuple containing a list of each.
     """
-    # hack: we're not parsing the xml, just having the parse functions
-    # apply a regular expression match, which we hope we can write adequately
-    # cue stock regular expressions joke ("now you have two problems")
+    # Hack: we're not parsing the xml, just having the parse functions
+    # apply a regular expression match, which we hope we can write adequately.
+    # Cue stock regular expressions joke ("now you have two problems").
 
     packages = [
         Entity.from_re_match(match) for match in parse_packages() if match
@@ -332,34 +337,16 @@
     return (packages, patches)
 
 
-class Location(object):
-    """location instances are created for each line in the locations file"""
-
-    def __init__(self, name, path, alternate=None):
-        self.name = name
-        self.path = path
-        self.alternate = alternate
-
-    def __repr__(self):
-        return 'location(%s, %s, %s)' % (self.name, self.path,
-                                             self.alternate)
-
-    def __str__(self):
-        return 'Locations: %s, %s, %s' % (self.name, self.path,
-                                              self.alternate)
-
-
 def parse_locations():
     """Parse the locations file.
 
-    Returns a list of Location instances."""
-    #
-    # The locations file contains lines of the form:
-    #   pkgname url alternate_url // comment
-    #
+    The locations file contains lines of the form:
+       pkgname url alternate_url // comment
+    Returns a list of location tuples after some adjustment.
+    """
     # alternate_url is used in cases where the package may not stay
-    # in one place over time (e.g., if it moves to an "old" directory
-    # when a new version is released)
+    # in one upstream place over time (e.g., if it moves to an "old"
+    # directory when a new version is released) - if we guess the pattern!
 
     try:
         package_file = open(epaths['update_file'])
@@ -370,14 +357,14 @@
     for line in package_file.readlines():
         if line[0] == '#':
             continue  # '#' is used for an initial comment
-        bits = string.split(line)
+        bits = line.split()
         if len(bits) < 3 or bits[1] == "none" or bits[1] == "None":
             # Skip malformed lines:
             # not enough fields, or with no retrieve location
             continue
         if bits[2] == "none" or bits[2] == "None":
             bits[2] = None
-        locations.append(Location(bits[0], bits[1], bits[2]))
+        locations.append((bits[0], bits[1], bits[2]))
     package_file.close()
     return locations
 
@@ -392,13 +379,13 @@
     missing = []
     for item in collection:
         # save fullpath in the entity instance since we'll use it again
-        item.fullpath = os.path.join(path, item.file)
+        item.fullpath = os.path.join(path, item.fname)
         if not os.path.exists(item.fullpath):
             missing.append(item)
         else:
             found.append(item)
             if check_sums or generate_sums:
-                item.domd5()
+                item.dohash()
     return found, missing
 
 
@@ -408,9 +395,7 @@
     Creates an entity instance for each and returns a list (this
     is to be able to use a common print routine, only the names matter)
     """
-    paths = dict((item.file, item) for item in collection)
-    # if we require Python >= 2.7, can use dict comprehension:
-    # paths = {item.file:item for item in collection}
+    paths = dict((item.fname, item) for item in collection)
     notfound = [
         Entity(None, filename) for filename in os.listdir(path)
         if filename not in paths
@@ -426,9 +411,9 @@
     """
     badsums = [
         entity for entity in collection
-        if entity.file in checksums and entity.md5sum != checksums[entity.file]
+        if entity.fname in checksums and entity.hash != checksums[entity.fname]
     ]
-    nosums = [entity for entity in collection if entity.file not in checksums]
+    nosums = [entity for entity in collection if entity.fname not in checksums]
     return badsums, nosums
 
 
@@ -499,7 +484,7 @@
     return 0
 
 
-def readmd5():
+def readhash():
     """Read and parse a checksum file.
     Returns a dictionary of sums indexed by filename.
     """
@@ -510,19 +495,19 @@
 
     checksums = {}
     for line in sums.readlines():
-        (cksum, name) = string.split(line)
+        (cksum, name) = line.split()
         checksums[name] = cksum
     sums.close()
     return checksums
 
 
-def writemd5(collection):
+def writehash(collection):
     """Generate a new checksum file from checksums saved in entities. """
     sums = open(epaths['md5sum_file'], 'w')
     if noisy:
-        print "writing checksums to",  epaths['md5sum_file']
+        print "writing checksums to", epaths['md5sum_file']
     for entity in collection:
-        sums.write("%s  %s\n" % (entity.md5sum, entity.file))
+        sums.write("%s  %s\n" % (entity.hash, entity.fname))
     sums.close()
 
 
@@ -531,7 +516,6 @@
     locations = parse_locations()
     retrieved = 0
     fails, missing = [], []
-    global initial_text
     if os.access(epaths['package_path'], os.W_OK):
         print "Retrieving..."
         for pkg in missing_packages:
@@ -570,7 +554,7 @@
     """delete any file with a bad checksum"""
     if collection:
         for item in collection:
-            print "Deleting " + item.file
+            print "Deleting " + item.fname
             item.delete_file()
 
 
@@ -652,16 +636,15 @@
 
 # 5. check checksums, if requested
 # Whether doing sums or not, generate a report of the work to date
+# so do that work first - it makes the ordering a little more sane
+
+exitcode = report(found_packages, found_patches, missing_packages,
+                  missing_patches, extras)
+
 if check_sums:
-    checksums = readmd5()
+    checksums = readhash()
     bad_packages, no_packages = check_checksums(found_packages, checksums)
     bad_patches, no_patches = check_checksums(found_patches, checksums)
-
-# tell us what happened
-exitcode = report(found_packages, found_patches, missing_packages,
-                  missing_patches, extras)
-
-if check_sums:
     exitcode += sum_report(bad_packages, bad_patches, no_packages, no_patches)
 
 # 6. Go fetch missing files if requested, and do another report
@@ -683,6 +666,8 @@
     exitcode = fetch_report(retrieved, fails, missing)
 
 # 7. Delete files with bad checksums if requested.
+# TODO: this looks dubious, what if we found bad files, fetched them,
+# and now the original list isn't all bad?
 if check_sums and delete_bad:
     delete_bad_checksums(bad_packages + bad_patches)
 
@@ -696,6 +681,6 @@
 # 9. Generate a new checksum file, if requested (only if nothing fatal
 # happened above)
 if generate_sums:
-    writemd5(found_packages + found_patches)
+    writehash(found_packages + found_patches)
 
 sys.exit(exitcode)



More information about the lsb-messages mailing list