หากคุณใช้ linux คุณสามารถใช้ grep
import pandas as pd
from time import time
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def zgrep_data(f, string):
'''grep multiple items f is filepath, string is what you are filtering for'''
grep = 'grep'
print('{} for {} from {}'.format(grep,string,f))
start_time = time()
if string == '':
out = subprocess.check_output([grep, string, f])
grep_data = StringIO(out)
data = pd.read_csv(grep_data, sep=',', header=0)
else:
columns = pd.read_csv(f, sep=',', nrows=1, header=None).values.tolist()[0]
out = subprocess.check_output([grep, string, f])
grep_data = StringIO(out)
data = pd.read_csv(grep_data, sep=',', names=columns, header=None)
print('{} finished for {} - {} seconds'.format(grep,f,time()-start_time))
return data
chunk['filed']>constant
ฉันสามารถ sandwich มันระหว่าง 2 ค่าคงที่? เช่น: constant1> chunk ['field']> constant2. หรือใช้ 'in range' ได้?