Saturday, 31 August 2013

To make the following code run faster?

To make the following code run faster?

Consider the code:
where i read an input file with 6columns (0-5)
Initialize a variable historyends to 5000.
Next when the column0 value i,e job[0] < 5000 i add 5000lines of the input
file in a list(historyjobs) else the rest of the lines till the eof in
another list(targetjobs).
Next all the historyjobs list all contents in item3,item4,item5 is equal
to targetjobs first list item3,item4,item5 when this condition is
satisfied add those historyjobs all item1 to list listsub.
Next find the running mean of the items in listsub & reverse the
list,store it in list a.Check the condition if items in listsub > a*0.9
the which staisfies the condition stores the result items in list condsub.
Next reopen the inputfile & check whether column0 is equal to items in
condsub if it satisfies then add the column1 to a list condrun.
Finally open the output file & write in colum0 the second item of first
list in targetjobs i,e j,in column1 write the average of list
condrun,column2 is (j-avg)/j ,column3 is maximum item in list condrun
,column4 is minimum item in list condrun,column5 is length of list
condrun,the last four column is based on condition.
Last i am repeating the whole procedure using a while loop by assigning
the variable historyends to the next item int(targetjobs[1][0])
from __future__
import division
import itertools
history_begins = 1; history_ends = 5000; n = 0; total = 0
historyjobs = []; targetjobs = []
listsub = []; listrun = []; listavg = [] ; F = [] ; condsub = [] ;condrun
= [] ;mlistsub = []; a = []
def check(inputfile):
f = open(inputfile,'r') #reads the inputfile
lines = f.readlines()
for line in lines:
job = line.split()
if( int(job[0]) < history_ends ): #if the column0 is less then
history_ends(i,e 5000 initially)
historyjobs.append(job) #historyjobs list contains all the
lines from the list whose column1 < history_ends
else:
targetjobs.append(job) #historyjobs list contains all the
lines from the list whose column1 > history_ends
k = 0
for i, element in enumerate(historyjobs):
if( (int(historyjobs[i][3]) == int(targetjobs[k][3])) and
(int(historyjobs[i][4]) == int(targetjobs[k][4])) and
(int(historyjobs[i][5]) == int(targetjobs[k][5])) ): #historyjobs
list all contents in column3,column4,column5 is equal to targetjobs
first list column3,column4,column5
listsub.append(historyjobs[i][1]) #when if condition true add
those historyjobs column1 to list listsub
def runningMean(iterable):
"""A generator, yielding a cumulative average of its input."""
num = 0
denom = 0
for x in iterable:
num += x
denom += 1
yield num / denom
def newfun(results):
results.reverse() # put them back in regular order
for value, average in results:
a.append(value)
return a #to return the value
def runcheck(subseq):
f = open('newfileinput','r') #again read the same inputfile
lines = f.readlines()
for line in lines:
job = line.split()
for i, element in enumerate(subseq):
if(int(job[1]) == int(subseq[i])): # if the column1 value of the
inputfile becomes equal to list obtained
condrun.append(str(job[2])) #return the value of column2 which
satisfies the if condition
return condrun
def listcreate(condrun,condsub):
f1 = open('outputfile','a') #outputfile to append the result
s = map(int,condrun)
j = int(targetjobs[0][2])
targetsub = int(targetjobs[0][1])
if(condsub != []):
try:
convertsub = int(condsub[-1])
a=sum(s)/len(s)
c=max(s)
d=min(s)
e1=abs(j-a)
er1=e1/j
g=len(s)
h=abs(convertsub-targetsub)
f1.write(str(j))
f1.write('\t')
f1.write('\t')
f1.write(str(round(a,2)))
f1.write('\t')
f1.write('\t')
f1.write(str(round(er1,3)))
f1.write('\t')
f1.write('\t')
f1.write(str(c))
f1.write('\t')
f1.write('\t')
f1.write(str(d))
f1.write('\t')
f1.write('\t')
f1.write(str(g))
f1.write('\t')
f1.write('\t')
f1.write(str(h))
f1.write('\t')
f1.write("\t")
if (float(er1) < 0.20):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.30):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.40):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.50):
f1.write("good")
f1.write("\n")
else :
f1.write("bad")
f1.write("\n")
except ZeroDivisionError :
print 'dem 0'
else:
print '0'
f1.close()
def new():
global history_ends
while 1: #To repeat the process untill the EOF(end of input file)
check('newfileinput') #First function call
if(len(targetjobs) != 1):
history_ends = int(targetjobs[1][0]) #initialize historyends to
targetjobs second lines first item
mlistsub = map(int,listsub)
results = list(itertools.takewhile(lambda x: x[0] > 0.9 * x[1],
itertools.izip(reversed(mlistsub),
runningMean(reversed(mlistsub)))))#call runningmean
function & check the condition
condsub = newfun(results) #function to reverse back the result
condrun=runcheck(condsub) #functionto match & return the value
listcreate(condrun,condsub) #function to write result to output file
del condrun[0:len(condrun)]#to delete the values in list
del condsub[0:len(condsub)]#to delete the values in list
del listsub[0:len(listsub)]#to delete the values in list
del targetjobs[0:len(targetjobs)]#to delete the values in list
del historyjobs[0:len(historyjobs)]#to delete the values in list
else:
break
def main():
new()
if __name__ == '__main__':
main()
the sample input file(whole file contains 200,000 lines):
1 0 9227 1152 34 2
2 111 7622 1120 34 2
3 68486 710 1024 14 2
6 265065 3389 800 22 2
7 393152 48438 64 132 3
8 412251 46744 64 132 3
9 430593 50866 256 95 4
10 430730 10770 256 95 4
11 433750 12701 256 14 3
12 437926 2794 64 34 2
13 440070 43 32 96 3
the sample output file contents:
930 1389.14 0.494 3625 977 7 15
bad bad bad good
4348 1331.75 0.694 3625 930 8 164
bad bad bad bad
18047 32237.0 0.786 61465 17285 3
325774 bad bad bad bad
1607 1509.0 0.061 1509 1509 1
6508 good good good good
304 40.06 0.868 80 32 35 53472
bad bad bad bad
7246 7247.0 0.0 7247 7247 1 9691
good good good good
95 1558.0 15.4 1607 1509 2 2148
bad bad bad bad
55 54.33 0.012 56 53 3 448142
good good good good
31 76.38 1.464 392 35 13 237152
bad bad bad bad
207 55.0 0.734 55 55 1 370 bad
bad bad bad
if anyone could suggest some changes through which the code runs faster it
would be helpful...

No comments:

Post a Comment