DHH20080609-02

InfoInfo
Search:    
# get-date-range.py
# old bailey
#
# given a directory of trial files, return
# an ordered list of trial dates

import os
import re

# given a directory string, return a list of file names
def getFileNames(dirstr):
    dircommand = 'dir ' + dirstr + ' /B'
    filelist = os.popen(dircommand).readlines()
    filelist = [x.rstrip() for x in filelist]
    return filelist

# get a list of trial files to process
indirname = 'Mined_1830s'
triallist = getFileNames(indirname)

# strip date out of pre- and post-fixed material
datedict = {}
datepattern = re.compile(r'\d{8}', re.UNICODE)
for tr in triallist:
    matchdate = datepattern.search(tr)
    datedict[matchdate.group()] = '1'

# output file consisting of list of dates
keys = datedict.keys()
keys.sort()
outfilename = 'dates-1830s.txt'
f = open(outfilename, 'w')
for k in keys: f.write(str(k)+'\n')
f.close()
This is a Wiki Spot wiki. Wiki Spot is a non-profit organization that helps communities collaborate via wikis.