Rev Author Line No. Line
4928 kaklik 1 #!/usr/bin/python
2  
4929 kaklik 3 import pandas as pd
4931 kaklik 4 import sys
4929 kaklik 5 import os
6 import time
7 import datetime
4928 kaklik 8  
4929 kaklik 9 from pymlab import config
10 from mlabutils import ejson
4928 kaklik 11  
4929 kaklik 12 parser = ejson.Parser()
4928 kaklik 13  
4929 kaklik 14 #### Script Arguments ###############################################
4928 kaklik 15  
4929 kaklik 16 if len(sys.argv) != 2:
17 sys.stderr.write("Invalid number of arguments.\n")
18 sys.stderr.write("Usage: %s CONFIG_FILE\n" % (sys.argv[0], ))
19 sys.exit(1)
20  
21 value = parser.parse_file(sys.argv[1])
22 dataSource = value['data_path']
23 dataArchive = value['data_archive']
4930 kaklik 24 dataUpload = value['data_upload']
25 stationName = value['origin']
26  
4929 kaklik 27 loop = 1
28  
29  
4928 kaklik 30 while True:
4929 kaklik 31 try:
4928 kaklik 32 print("Start")
33 ## Create sorted list of csv files
34 listOfDataFiles = list() #empty list
35 listOfSpecDataFiles = list() #empty list
36 files = list() #empty list
37 falg = False # is computation needed
4929 kaklik 38  
39 files = sorted(os.listdir(dataSource)) # list of all files and folders in directory
40 for idx, val in enumerate(files): #goes through files
41 if val.endswith("data.csv"): # in case of *data.csv
42 listOfDataFiles.append(val) #add file to listOfFiles
43  
44 ## Find the newest and oldest and compare them. If they are from different day, compute the average of all measurement from oldest day
45 if len(listOfDataFiles)>=2: # if there are more than 2 data files
46 first = listOfDataFiles[0] # get first of them
47 last = listOfDataFiles[-1] # get last of them
48  
49 if time.mktime(datetime.datetime.strptime(last[:8], "%Y%m%d").timetuple()) > time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()): # if the last is older than first
4928 kaklik 50 flag = True # computation needed
51 print("Computing...")
52 print(loop)
4929 kaklik 53 loop +=1
54 listOfSpecDataFiles = list() # empty list
55  
56 for file in listOfDataFiles: # go through data files and create lis of data files measured on same day
57 # if the day is same like the first one
58 if time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()) == time.mktime(datetime.datetime.strptime(file[:8], "%Y%m%d").timetuple()):
59 listOfSpecDataFiles.append(file)
60  
61 for file in listOfSpecDataFiles:
4931 kaklik 62 df=pd.read_csv(dataSource + file, sep=';', header=None) # read current csv
4929 kaklik 63 dim=df.shape # gets data file dimensions
64 rowsInd=dim[0] # maximal index of rows
65 columnsInd=dim[1] # maximal index of columns
66 values=pd.DataFrame() # empty DataFrame
67  
68 for x in range(0,columnsInd): # for each column
69 values = values.set_value(0,x,round(df[x].mean(),3),0) #calculates mean value for all cloumns and round it by 3
4928 kaklik 70  
4930 kaklik 71 filename = dataUpload + first[:8]+'000000_' + stationName + '_data_mean.csv'
4928 kaklik 72 outfile = open(filename, 'a')
4929 kaklik 73 values.to_csv(filename, sep=';', header=None, index=False, mode='a') # save (add) DataFrame to csv
4928 kaklik 74 outfile.close()
75  
76 # move files to archive structure
77 for file in listOfSpecDataFiles:
78 year = file[:4]
79 month = file[4:6]
80 day = file[6:8]
81 directory = dataArchive + year + "/" + month + "/" + day + "/"
82 if not os.path.exists(directory):
83 os.makedirs(directory)
84 os.rename(dataSource + file, dataArchive + year + "/" + month + "/" + day + "/" + file) # move file
85  
86 else:
87 flag = False # computation is not needed
88 else:
89 flag = False # computation is not needed
90  
91 if flag == False:
92 time.sleep(10) #long sleep, because is nothing to process
93  
94 except ValueError:
95 print ValueError