如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
import pandas as pd from numpy import * import matplotlib.pylab as plt import copy def read(filename): dat = pd.read_csv(filename,iterator = True ) loop = True chunkSize = 1000000 R = [] while loop: try : data = dat.get_chunk(chunkSize) data = data.loc[:, 'B' : 'C' ] # 切片 data = data[data.B = = 855 ] #条件选择 data = data.set_index([ 'C' ]) # 设置索引 data.loc[:, 'D' ] = array([ 1 ] * len (data)) #增加一列 data = data.resample( 'D' ). sum () #按天求和 data = data.loc[:, 'D' ] #截取 data.fillna( 0 ) #填充缺失值 R.append(data) except StopIteration: loop = False print ( "Iteration is stopped." ) R.to_csv( '855_pay.csv' ) # 保存 def read2(filename): reader = pd.read_csv(filename,iterator = True ) loop = True chunkSize = 100000 chunks = [] while loop: try : chunk = reader.get_chunk(chunkSize) chunks.append(chunk) except StopIteration: loop = False print ( "Iteration is stopped." ) df = pd.concat(chunks, ignore_index = True ) return df def read3save(filename): dat = pd.read_csv(filename) #data = dat.get_chunk(chunkSize) data = dat.loc[:, 'B' : 'C' ] # 切片 data = data[data.B = = 855 ] #条件选择 print (shape(data)) data[ 'C' ] = pd.to_datetime(data[ 'C' ]) # 转换成时间格式 data = data.set_index([ 'C' ]) # 设置索引 if len (data) = = 0 : return data.loc[:, 'D' ] = array([ 1 ] * len (data)) #增加一列 data = data.resample( 'D' ). sum () #按天求和 data = data.loc[:, 'D' ] #截取 data.fillna( 0 ) #填充缺失值 data.to_csv( '855_pay.csv' ,mode = 'a' ) # 保存 def loadDataSet(fileName, delim = '\t' ): fr = open (fileName) stringArr = [line.strip().split(delim) for line in fr.readlines()] datArr = [ list ( map ( float ,line)) for line in stringArr] return mat(datArr) def getShopData(): fr = open ( 'shopInfo.txt' ) shopID = [line.strip().split( '\n' ) for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr] for i in range ( 1 , 9 ): name = "user_pay.001.00%d" % i dat = pd.read_csv(name) #data = dat.get_chunk(chunkSize) data = dat.loc[:, 'B' : 'C' ] # 切片 for factor in shopID: data = data[data.B = = int ( str (factor[ 0 ]))] #条件选择 print (shape(data)) if len (data) = = 0 : continue data[ 'C' ] = pd.to_datetime(data[ 'C' ]) # 转换成时间格式 data = data.set_index([ 'C' ]) # 设置索引 data.loc[:, 'D' ] = array([ 1 ] * len (data)) #增加一列 data = data.resample( 'D' ). sum () #按天求和 data = data.loc[:, 'D' ] #截取 data.fillna( 0 ) #填充缺失值 s = str (factor[ 0 ]) data.to_csv(savename,mode = 'a' ) # 保存 del dat print ( "over" ) def tset(filename): dat = pd.read_csv(filename) #data = dat.get_chunk(chunkSize) data = dat.loc[:, 'B' : 'C' ] # 切片 data = data[data.B = = 855 ] #条件选择 print (shape(data)) data[ 'C' ] = pd.to_datetime(data[ 'C' ]) # 转换成时间格式 data = data.set_index([ 'C' ]) # 设置索引 if len (data) = = 0 : return data.loc[:, 'D' ] = array([ 1 ] * len (data)) #增加一列 data = data.resample( 'D' ). sum () #按天求和 data = data.loc[:, 'D' ] #截取 data.fillna( 0 ) #填充缺失值 #data.to_csv('855_pay.csv',mode='a') # 保存 s = 'my' savename = 'D:\python\data\%s_pay.csv' % s data.to_csv(savename,mode = 'a' ) # 保存 def getShopData2(filename): import csv # fr = open('shopInfo.txt') # shopID = [line.strip().split('\n') for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr] #for i in range(1,9): #name="user_pay.001.00%d"%i dat = pd.read_csv(filename) #data = dat.get_chunk(chunkSize) data = dat.loc[:, 'B' : 'C' ] # 切片 data[ 'C' ] = pd.to_datetime(data[ 'C' ]) # 转换成时间格式 data = data.set_index([ 'C' ]) # 设置索引 data.loc[:, 'D' ] = array([ 1 ] * len (data)) #增加一列 for i in range ( 1 , 2001 ): d = copy.copy(data) d = d[data.B = = i] #条件选择 #print(shape(d)) print (i) if len (d) = = 0 : continue d = d.resample( 'D' ). sum () #按天求和 d = d.loc[:, 'D' ] #截取 d.fillna( 0 ) #填充缺失值 s = str (i) #print(s) savename = 'D:\python\data2\%s_pay.csv' % s c = open (savename, 'a' ) writer = csv.writer(c) writer.writerow([ 'C' , 'D' ]) c.close() d.to_csv(savename,mode = 'a' ) # 保存 # del dat print ( "over" ) def formatData(): #fr = open('shopInfo.txt') #shopID = [line.strip().split('\n') for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr] #data = dat.get_chunk(chunkSize) for i in range ( 1 , 2001 ): s = str (i) print (s) name = 'D:\python\data2\%s_pay.csv' % s dat = pd.read_csv(name) data[ 'C' ] = pd.to_datetime(data[ 'C' ]) # 转换成时间格式 data = data.set_index([ 'C' ]) # 设置索引 data = data.resample( 'D' ). sum () #按天求和 data.fillna( 0 ) #填充缺失值 savename = 'D:\python\data3\%s_pay.csv' % s data.to_csv(savename,mode = 'w' ) # 保存 del dat print ( "over" ) |
以上这篇python pandas 对时间序列文件处理的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/Faith_yu/article/details/56009125