It is limited to at most ~30,000 files in one folder so I had to split the ~72,000 files mirrored off the PDB into three folders (set1, set2 and set3) and generate an output file for each individually. the files can then be merged together into one using cat.

import glob, os, pymol, sys

from pymol import cmd

from chempy import cpv

the_pdb="/Users/cale/pdb/set1"

files = glob.glob(the_pdb+os.sep+"*.ent.gz")

if not len(files):

print "Please set 'the_pdb' variable to a valid path containing PDB files."

sys.exit(1)

else:

print "Processing %d files." % len(files)

s, outFile = "resn HIS and name ND1", "dist_set1.csv"

f = open(outFile, 'wb')

# write the header

f.write("PDB\tCHAIN\tRESI\tATOM-A\tCHAIN\tRESI\tATOM-B\tDISTANCE\n")

# for each file in the mirror

for x in files:

cmd.load(x,finish=1)

n = cmd.get_names()[0]

m = cmd.get_model(s).atom

# pairwise for each atom

for aa in m:

for bb in m:

# avoid distances to self

if aa==bb: continue

# avoid duplicates

if aa>bb: continue

distance = cpv.distance(aa.coord, bb.coord)

# don't list if distance is above 10 angstroms

# if distance > 10 : continue

f.write( "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%f\n" %

(n, aa.chain, aa.resi, aa.index,

bb.chain, bb.resi, bb.index,

distance))

cmd.delete(n)

f.close()

print "Processed %d files. Please see %s for results." % (len(files), outFile)