Index: dev-tools/scripts/smokeTestRelease.py
===================================================================
--- dev-tools/scripts/smokeTestRelease.py	(revision 1220577)
+++ dev-tools/scripts/smokeTestRelease.py	(working copy)
@@ -22,6 +22,9 @@
 import urlparse
 import sys
 import HTMLParser
+from collections import defaultdict
+import libxml2
+import filecmp
 
 # This tool expects to find /lucene and /solr off the base URL.  You
 # must have a working gpg, tar, unzip in your path.  This has only
@@ -71,10 +74,10 @@
     links.append((text, fullURL))
   return links
 
-def download(name, urlString, tmpDir):
+def download(name, urlString, tmpDir, quiet=False):
   fileName = '%s/%s' % (tmpDir, name)
   if DEBUG and os.path.exists(fileName):
-    if fileName.find('.asc') == -1:
+    if not quiet and fileName.find('.asc') == -1:
       print '    already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
     return
   fIn = urllib2.urlopen(urlString)
@@ -94,7 +97,7 @@
     fOut.close()
     if not success:
       os.remove(fileName)
-  if fileName.find('.asc') == -1:
+  if not quiet and fileName.find('.asc') == -1:
     print '    %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
     
 def load(urlString):
@@ -291,11 +294,11 @@
     raise RuntimeError('MD5 digest mismatch for %s: expected %s but got %s' % (artifact, md5Expected, md5Actual))
   if sha1Actual != sha1Expected:
     raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
-  
+
 def getDirEntries(urlString):
   links = getHREFs(urlString)
   for i, (text, subURL) in enumerate(links):
-    if text == 'Parent Directory':
+    if text == 'Parent Directory' or text == '..':
       return links[(i+1):]
 
 def unpack(project, tmpDir, artifact, version):
@@ -440,7 +443,350 @@
     if numHits < 100:
       raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
     print '      got %d hits for query "lucene"' % numHits
-        
+
+
+def checkMaven(baseURL, tmpDir, version):
+  # Locate the release branch in subversion
+  m = re.match('(\d+)\.(\d+)', version) # Get Major.minor version components
+  releaseBranchText = 'lucene_solr_%s_%s/' % (m.group(1), m.group(2))
+  branchesURL = 'http://svn.apache.org/repos/asf/lucene/dev/branches/'
+  releaseBranchSvnURL = None
+  branches = getDirEntries(branchesURL)
+  for text, subURL in branches:
+    if text == releaseBranchText:
+      releaseBranchSvnURL = subURL
+  if releaseBranchSvnURL is None:
+    raise RuntimeError('Release branch %s%s not found' % (branchesURL, releaseBranchText))
+
+  print '    download POM templates',
+  POMtemplates = defaultdict()
+  getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
+  print
+  print '    download artifacts',
+  artifacts = {'lucene': [], 'solr': []}
+  for project in ('lucene', 'solr'):
+    artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
+    targetDir = '%s/maven/org/apache/%s' % (tmpDir, project)
+    if not os.path.exists(targetDir):
+      os.makedirs(targetDir)
+    crawl(artifacts[project], artifactsURL, targetDir)
+  print
+  print '    verify that each binary artifact has a deployed POM...'
+  verifyPOMperBinaryArtifact(artifacts, version)
+  print '    verify that there is an artifact for each POM template...'
+  verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
+  print "    verify Maven artifacts' md5/sha1 digests..."
+  verifyMavenDigests(artifacts)
+  print '    verify that all non-Mavenized deps are deployed...'
+  nonMavenizedDeps = dict()
+  checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir, version, releaseBranchSvnURL)
+  print '    check for javadoc and sources artifacts...'
+  checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
+  print "    verify deployed POMs' coordinates..."
+  verifyDeployedPOMsCoordinates(artifacts, version)
+  print '    verify maven artifact sigs',
+  verifyMavenSigs(baseURL, tmpDir, artifacts)
+
+  distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
+
+  print '    verify that non-Mavenized deps are the same as those in the binary distribution...'
+  checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
+  print '    verify that Maven artifacts are the same as those the binary distribution...'
+  checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
+
+
+def getDistributionsForMavenChecks(tmpDir, version, baseURL):
+  distributionFiles = defaultdict()
+  for project in ('lucene', 'solr'):
+    distribution = '%s-%s.tgz' % (project, version)
+    if project == 'solr': distribution = 'apache-' + distribution
+    if not os.path.exists('%s/%s' % (tmpDir, distribution)):
+      distURL = '%s/%s/%s' % (baseURL, project, distribution)
+      print '    download %s...' % distribution,
+      download(distribution, distURL, tmpDir)
+    destDir = '%s/unpack-%s-maven' % (tmpDir, project)
+    if os.path.exists(destDir):
+      shutil.rmtree(destDir)
+    os.makedirs(destDir)
+    os.chdir(destDir)
+    print '    unpack %s...' % distribution
+    unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
+    run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
+    if project == 'solr': # unpack the Solr war
+      unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
+      print '        unpack Solr war...'
+      run('jar xvf */dist/*.war', unpackLogFile)
+    distributionFiles[project] = []
+    for root, dirs, files in os.walk(destDir):
+      distributionFiles[project].extend([os.path.join(root, file) for file in files])
+  return distributionFiles
+
+def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
+  for project in ('lucene', 'solr'):
+    for artifact in artifacts[project]:
+      if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
+        javadocJar = artifact[:-4] + '-javadoc.jar'
+        if javadocJar not in artifacts[project]:
+          raise RuntimeError('missing: %s' % javadocJar)
+        sourcesJar = artifact[:-4] + '-sources.jar'
+        if sourcesJar not in artifacts[project]:
+          raise RuntimeError('missing: %s' % sourcesJar)
+
+def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
+  for project in ('lucene', 'solr'):
+    distFilenames = dict()
+    for file in distributionFiles[project]:
+      distFilenames[os.path.basename(file)] = file
+    for dep in nonMavenizedDeps.keys():
+      if ('/%s/' % project) in dep:
+        depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
+        if not depOrigFilename in distFilenames:
+          raise RuntimeError('missing: non-mavenized dependency %s' % nonMavenizedDeps[dep])
+        identical = filecmp.cmp(dep, distFilenames[depOrigFilename], shallow=False)
+        if not identical:
+          raise RuntimeError('Deployed non-mavenized dep %s differs from distribution dep %s'
+                            % (dep, distFilenames[depOrigFilename]))
+
+def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version):
+  reJarWar = re.compile(r'\.[wj]ar$')
+  for project in ('lucene', 'solr'):
+    distFilenames = dict()
+    for file in distributionFiles[project]:
+      distFilenames[os.path.basename(file)] = file
+    for artifact in artifacts[project]:
+      if reJarWarPom.search(artifact):
+        if artifact not in nonMavenizedDeps.keys():
+          artifactFilename = os.path.basename(artifact)
+          if artifactFilename not in distFilenames.keys():
+            raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
+                              % (artifact, project))
+          identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False)
+          if not identical:
+            raise RuntimeError('Maven artifact %s is not identical to %s in %s binary distribution'
+                              % (artifact, distFilenames[artifactFilename], project))
+
+def verifyMavenDigests(artifacts):
+  reJarWarPom = re.compile(r'\.(?:[wj]ar|pom)$')
+  for project in ('lucene', 'solr'):
+    for artifactFile in [a for a in artifacts[project] if reJarWarPom.search(a)]:
+      if artifactFile + '.md5' not in artifacts[project]:
+        raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
+      if artifactFile + '.sha1' not in artifacts[project]:
+        raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
+      with open(artifactFile + '.md5', 'r') as md5File:
+        md5Expected = md5File.read().strip()
+      with open(artifactFile + '.sha1', 'r') as sha1File:
+        sha1Expected = sha1File.read().strip()
+      md5 = hashlib.md5()
+      sha1 = hashlib.sha1()
+      inputFile = open(artifactFile)
+      while True:
+        bytes = inputFile.read(65536)
+        if bytes == '': break
+        md5.update(bytes)
+        sha1.update(bytes)
+      inputFile.close()
+      md5Actual = md5.hexdigest()
+      sha1Actual = sha1.hexdigest()
+      if md5Actual != md5Expected:
+        raise RuntimeError('MD5 digest mismatch for %s: expected %s but got %s'
+                           % (artifactFile, md5Expected, md5Actual))
+      if sha1Actual != sha1Expected:
+        raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s'
+                           % (artifactFile, sha1Expected, sha1Actual))
+
+def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts, tmpDir, version, releaseBranchSvnURL):
+  """
+  - check for non-mavenized dependencies listed in the grandfather POM template
+  - nonMavenizedDependencies is populated with a map from non-mavenized dependency
+    artifact path to the original jar path
+  """
+  xpathInstallPluginConfig = '/p:project/p:profiles/p:profile[p:id="bootstrap"]/p:build/p:plugins'\
+                           + '/p:plugin[p:artifactId="maven-install-plugin"]/p:executions/p:execution/p:configuration'
+  xpathPOMDir = '/p:project/p:profiles/p:profile[p:id="bootstrap"]/p:build/p:plugins'\
+              + '/p:plugin[p:artifactId="maven-resources-plugin"]/p:executions/p:execution/p:configuration'\
+              + '/p:resources/p:resource/p:directory'
+  with open(POMtemplates['grandfather'][0], 'r') as f:
+    doc = libxml2.parseDoc(f.read())
+  context = doc.xpathNewContext()
+  context.xpathRegisterNs('p', 'http://maven.apache.org/POM/4.0.0')
+  for dep in context.xpathEval(xpathInstallPluginConfig):
+    child = dep.children
+    groupId, artifactId, file, pomFile = '', '', '', ''
+    while child is not None:
+      if child.type == "element":
+        if child.name == 'groupId':
+          groupId = 'org.apache.lucene' if child.content == r'${project.groupId}' else child.content
+        elif child.name == 'artifactId': artifactId = child.content
+        elif child.name == 'file': file = child.content
+        elif child.name == 'pomFile': pomFile = child.content
+      child = child.next
+    if groupId in ('org.apache.lucene', 'org.apache.solr'):
+      depJar = '%s/maven/%s/%s/%s/%s-%s.jar' \
+             % (tmpDir, groupId.replace('.', '/'), artifactId, version, artifactId, version)
+      if depJar not in artifacts['lucene'] and depJar not in artifacts['solr']:
+        raise RuntimeError('Missing non-mavenized dependency %s' % depJar)
+      nonMavenizedDependencies[depJar] = file
+    elif pomFile:
+      # Find non-Mavenized deps with associated POMs
+      pomFile = pomFile.split('/')[-1] # remove path
+      for pomDir in context.xpathEval(xpathPOMDir):
+        entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir.content.strip()))
+        for text, subURL in entries:
+          if text == pomFile:
+            doc2 = libxml2.parseDoc(load(subURL))
+            groupId2, artifactId2, packaging2, POMversion = getPOMcoordinate(doc2)
+            depJar = '%s/maven/%s/%s/%s/%s-%s.jar' \
+                   % (tmpDir, groupId2.replace('.', '/'), artifactId2, version, artifactId2, version)
+            if depJar not in artifacts['lucene'] and depJar not in artifacts['solr']:
+              raise RuntimeError('Missing non-mavenized dependency %s' % depJar)
+            nonMavenizedDependencies[depJar] = file
+
+def getPOMcoordinate(doc):
+  context = doc.xpathNewContext()
+  context.xpathRegisterNs('p', 'http://maven.apache.org/POM/4.0.0')
+  groupId = context.xpathEval('/p:project/p:groupId')
+  if not groupId:
+    groupId = context.xpathEval('/p:project/p:parent/p:groupId')
+  groupId = groupId[0].content
+  artifactId = context.xpathEval('/p:project/p:artifactId')[0].content
+  version = context.xpathEval('/p:project/p:version')
+  if not version:
+    version = context.xpathEval('/p:project/p:parent/p:version')
+  version = version[0].content
+  packaging = context.xpathEval('/p:project/p:packaging')
+  packaging = 'jar' if not packaging else packaging[0].content
+  return groupId, artifactId, packaging, version
+
+
+def verifyMavenSigs(baseURL, tmpDir, artifacts):
+  """Verify Maven artifact signatures"""
+  for project in ('lucene', 'solr'):
+    keysFile = '%s/%s.KEYS' % (tmpDir, project)
+    if not os.path.exists(keysFile):
+      keysURL = '%s/%s/KEYS' % (baseURL, project)
+      download('%s.KEYS' % project, keysURL, tmpDir, quiet=True)
+
+    # Set up clean gpg world; import keys file:
+    gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
+    if os.path.exists(gpgHomeDir):
+      shutil.rmtree(gpgHomeDir)
+    os.makedirs(gpgHomeDir, 0700)
+    run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
+        '%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
+
+    reArtifacts = re.compile(r'\.(?:pom|[jw]ar)$')
+    for artifactFile in [a for a in artifacts[project] if reArtifacts.search(a)]:
+      artifact = os.path.basename(artifactFile)
+      sigFile = '%s.asc' % artifactFile
+      # Test sig (this is done with a clean brand-new GPG world)
+      logFile = '%s/%s.%s.gpg.verify.log' % (tmpDir, project, artifact)
+      run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
+          logFile)
+      # Forward any GPG warnings, except the expected one (since its a clean world)
+      f = open(logFile, 'rb')
+      for line in f.readlines():
+        if -1 != line.lower().find('warning') \
+           and -1 == line.find('WARNING: This key is not certified with a trusted signature') \
+           and -1 == line.find('WARNING: using insecure memory'):
+          print '      GPG: %s' % line.strip()
+      f.close()
+
+      # Test trust (this is done with the real users config)
+      run('gpg --import %s' % keysFile,
+          '%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
+      logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
+      run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
+      # Forward any GPG warnings:
+      f = open(logFile, 'rb')
+      for line in f.readlines():
+        if -1 != line.lower().find('warning') \
+           and -1 == line.find('WARNING: This key is not certified with a trusted signature') \
+           and -1 == line.find('WARNING: using insecure memory'):
+          print '      GPG: %s' % line.strip()
+      f.close()
+
+      sys.stdout.write('.')
+  print
+
+def verifyPOMperBinaryArtifact(artifacts, version):
+  """verify that each binary jar and war has a corresponding POM file"""
+  reBinaryJarWar = re.compile(r'%s\.[jw]ar$' % re.escape(version))
+  for project in ('lucene', 'solr'):
+    for artifact in [a for a in artifacts[project] if reBinaryJarWar.search(a)]:
+      POM = artifact[:-4] + '.pom'
+      if POM not in artifacts[project]:
+        raise RuntimeError('missing: POM for %s' % artifact)
+
+def verifyDeployedPOMsCoordinates(artifacts, version):
+  """
+  verify that each POM's coordinate (drawn from its content) matches
+  its filepath, and verify that the corresponding artifact exists.
+  """
+  for project in ('lucene', 'solr'):
+    for POM in [a for a in artifacts[project] if a.endswith('.pom')]:
+      with open(POM, 'r') as f: doc = libxml2.parseDoc(f.read())
+      groupId, artifactId, packaging, POMversion = getPOMcoordinate(doc)
+      POMpath = '%s/%s/%s/%s-%s.pom' \
+              % (groupId.replace('.', '/'), artifactId, version, artifactId, version)
+      if not POM.endswith(POMpath):
+        raise RuntimeError("Mismatch between POM coordinate %s:%s:%s and filepath: %s"
+                          % (groupId, artifactId, POMversion, POM))
+      # Verify that the corresponding artifact exists
+      artifact = POM[:-3] + packaging
+      if artifact not in artifacts[project]:
+        raise RuntimeError('Missing corresponding .%s artifact for POM %s' % (packaging, POM))
+
+def verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version):
+  """verify that each POM template's artifact is present in artifacts"""
+  for project in ('lucene', 'solr'):
+    for POMtemplate in POMtemplates[project]:
+      with open(POMtemplate, 'r') as f: doc = libxml2.parseDoc(f.read())
+      context = doc.xpathNewContext()
+      context.xpathRegisterNs('p', 'http://maven.apache.org/POM/4.0.0')
+      # Ignore POMs that won't be deployed (e.g. aggregation POMs)
+      skipDeploy = context.xpathEval(
+        '/p:project/p:build/p:plugins/p:plugin[p:artifactId="maven-deploy-plugin"]/p:configuration/p:skip')
+      if not skipDeploy or 'true' != skipDeploy[0].content.strip().lower():
+        groupId, artifactId, packaging, POMversion = getPOMcoordinate(doc)
+        # Ignore POMversion, since its value will not have been interpolated
+        artifact = '%s/maven/%s/%s/%s/%s-%s.%s' \
+                 % (tmpDir, groupId.replace('.', '/'), artifactId, version, artifactId, version, packaging)
+        if artifact not in artifacts['lucene'] and artifact not in artifacts['solr']:
+          raise RuntimeError('Missing artifact %s' % artifact)
+
+def getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL):
+  releaseBranchSvnURL += 'dev-tools/maven/'
+  targetDir = '%s/dev-tools/maven' % tmpDir
+  if not os.path.exists(targetDir):
+    os.makedirs(targetDir)
+  allPOMtemplates = []
+  crawl(allPOMtemplates, releaseBranchSvnURL, targetDir, set(['Apache Subversion'])) # Ignore "Apache Subversion" links
+  POMtemplates['lucene'] = [p for p in allPOMtemplates if '/lucene/' in p]
+  if POMtemplates['lucene'] is None:
+    raise RuntimeError('No Lucene POMs found at %s' % releaseBranchSvnURL)
+  POMtemplates['solr'] = [p for p in allPOMtemplates if '/solr/' in p]
+  if POMtemplates['solr'] is None:
+    raise RuntimeError('No Solr POMs found at %s' % releaseBranchSvnURL)
+  POMtemplates['grandfather'] = [p for p in allPOMtemplates if '/maven/pom.xml.template' in p]
+  if POMtemplates['grandfather'] is None:
+    raise RuntimeError('No Lucene/Solr grandfather POM found at %s' % releaseBranchSvnURL)
+
+
+def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
+  for text, subURL in getDirEntries(urlString):
+    if text not in exclusions:
+      path = os.path.join(targetDir, text)
+      if text.endswith('/'):
+        if not os.path.exists(path):
+          os.makedirs(path)
+        crawl(downloadedFiles, subURL, path, exclusions)
+      else:
+        if not os.path.exists(path) or not DEBUG:
+          download(text, subURL, targetDir, quiet=True)
+        downloadedFiles.append(path)
+        sys.stdout.write('.')
+
 def main():
 
   if len(sys.argv) != 4:
@@ -486,6 +832,9 @@
     unpack('solr', tmpDir, artifact, version)
   unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
 
+  print 'Test Maven artifacts for Lucene and Solr...'
+  checkMaven(baseURL, tmpDir, version)
+
 if __name__ == '__main__':
   main()
   
