4928 |
kaklik |
1 |
#!/usr/bin/python |
|
|
2 |
|
4929 |
kaklik |
3 |
import pandas as pd |
4931 |
kaklik |
4 |
import sys |
4929 |
kaklik |
5 |
import os |
|
|
6 |
import time |
|
|
7 |
import datetime |
4928 |
kaklik |
8 |
|
4929 |
kaklik |
9 |
from pymlab import config |
|
|
10 |
from mlabutils import ejson |
4928 |
kaklik |
11 |
|
4929 |
kaklik |
12 |
parser = ejson.Parser() |
4928 |
kaklik |
13 |
|
4929 |
kaklik |
14 |
#### Script Arguments ############################################### |
4928 |
kaklik |
15 |
|
4929 |
kaklik |
16 |
if len(sys.argv) != 2: |
|
|
17 |
sys.stderr.write("Invalid number of arguments.\n") |
|
|
18 |
sys.stderr.write("Usage: %s CONFIG_FILE\n" % (sys.argv[0], )) |
|
|
19 |
sys.exit(1) |
|
|
20 |
|
|
|
21 |
value = parser.parse_file(sys.argv[1]) |
|
|
22 |
dataSource = value['data_path'] |
|
|
23 |
dataArchive = value['data_archive'] |
4930 |
kaklik |
24 |
dataUpload = value['data_upload'] |
|
|
25 |
stationName = value['origin'] |
|
|
26 |
|
4929 |
kaklik |
27 |
loop = 1 |
|
|
28 |
|
|
|
29 |
|
4928 |
kaklik |
30 |
while True: |
4929 |
kaklik |
31 |
try: |
4928 |
kaklik |
32 |
print("Start") |
|
|
33 |
## Create sorted list of csv files |
|
|
34 |
listOfDataFiles = list() #empty list |
|
|
35 |
listOfSpecDataFiles = list() #empty list |
|
|
36 |
files = list() #empty list |
|
|
37 |
falg = False # is computation needed |
4929 |
kaklik |
38 |
|
|
|
39 |
files = sorted(os.listdir(dataSource)) # list of all files and folders in directory |
|
|
40 |
for idx, val in enumerate(files): #goes through files |
|
|
41 |
if val.endswith("data.csv"): # in case of *data.csv |
|
|
42 |
listOfDataFiles.append(val) #add file to listOfFiles |
|
|
43 |
|
|
|
44 |
## Find the newest and oldest and compare them. If they are from different day, compute the average of all measurement from oldest day |
|
|
45 |
if len(listOfDataFiles)>=2: # if there are more than 2 data files |
|
|
46 |
first = listOfDataFiles[0] # get first of them |
|
|
47 |
last = listOfDataFiles[-1] # get last of them |
|
|
48 |
|
|
|
49 |
if time.mktime(datetime.datetime.strptime(last[:8], "%Y%m%d").timetuple()) > time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()): # if the last is older than first |
4928 |
kaklik |
50 |
flag = True # computation needed |
|
|
51 |
print("Computing...") |
|
|
52 |
print(loop) |
4929 |
kaklik |
53 |
loop +=1 |
|
|
54 |
listOfSpecDataFiles = list() # empty list |
|
|
55 |
|
|
|
56 |
for file in listOfDataFiles: # go through data files and create lis of data files measured on same day |
|
|
57 |
# if the day is same like the first one |
|
|
58 |
if time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()) == time.mktime(datetime.datetime.strptime(file[:8], "%Y%m%d").timetuple()): |
|
|
59 |
listOfSpecDataFiles.append(file) |
|
|
60 |
|
|
|
61 |
for file in listOfSpecDataFiles: |
4931 |
kaklik |
62 |
df=pd.read_csv(dataSource + file, sep=';', header=None) # read current csv |
4929 |
kaklik |
63 |
dim=df.shape # gets data file dimensions |
|
|
64 |
rowsInd=dim[0] # maximal index of rows |
|
|
65 |
columnsInd=dim[1] # maximal index of columns |
|
|
66 |
values=pd.DataFrame() # empty DataFrame |
|
|
67 |
|
|
|
68 |
for x in range(0,columnsInd): # for each column |
|
|
69 |
values = values.set_value(0,x,round(df[x].mean(),3),0) #calculates mean value for all cloumns and round it by 3 |
4928 |
kaklik |
70 |
|
4930 |
kaklik |
71 |
filename = dataUpload + first[:8]+'000000_' + stationName + '_data_mean.csv' |
4928 |
kaklik |
72 |
outfile = open(filename, 'a') |
4929 |
kaklik |
73 |
values.to_csv(filename, sep=';', header=None, index=False, mode='a') # save (add) DataFrame to csv |
4928 |
kaklik |
74 |
outfile.close() |
|
|
75 |
|
|
|
76 |
# move files to archive structure |
|
|
77 |
for file in listOfSpecDataFiles: |
|
|
78 |
year = file[:4] |
|
|
79 |
month = file[4:6] |
|
|
80 |
day = file[6:8] |
|
|
81 |
directory = dataArchive + year + "/" + month + "/" + day + "/" |
|
|
82 |
if not os.path.exists(directory): |
|
|
83 |
os.makedirs(directory) |
|
|
84 |
os.rename(dataSource + file, dataArchive + year + "/" + month + "/" + day + "/" + file) # move file |
|
|
85 |
|
|
|
86 |
else: |
|
|
87 |
flag = False # computation is not needed |
|
|
88 |
else: |
|
|
89 |
flag = False # computation is not needed |
|
|
90 |
|
|
|
91 |
if flag == False: |
|
|
92 |
time.sleep(10) #long sleep, because is nothing to process |
|
|
93 |
|
|
|
94 |
except ValueError: |
|
|
95 |
print ValueError |