<>1 problem

Now there is a need , In a record of wechat chat records csv In the file , Remove the record of the same message sent by the same person on the same day .

<>2 Implementation code
# -*- coding: utf-8 -*- import pandas as pd import os from tqdm import * class
DeleteRepetition: def __init__(self, finished_filedir='newdata'): '''
After initialization data cleaning csv File storage address :param finished_filedir: Save path ''' self.finished_filedir =
finished_filedir def deleteCsvRepetition(self, filename): '''
Single csv Document to ID, time , Message is primary key , Remove the same words from the same people :param filename: file name :return:null ''' #
read csv file try: csvfile = pd.read_csv(filename, encoding="utf-8") # print(csvfile)
except Exception: raise Exception('file is not found!') # The first line has no data , remove csvfile =
csvfile[1:] # print(csvfile) # Copy one copy for restore time = csvfile[' time '].copy() #
print(time) # print(time.str.split(' ', expand=True)[0]) # In days , Extraction time
csvfile[' time '] = csvfile[' time '].str.split(' ', expand=True)[0] #
print(csvfile[' time ']) # Remove the data of the same day, the same person and the same message csvfile.drop_duplicates(subset=['ID', ' time ',
' news '], keep='first', inplace=True) # Recovery time format csvfile[' time '] = time #
print(csvfile) # write in csv file csvfile.to_csv(self.finished_filedir + '/' + filename,
index=0) def deleteCsvsRepetition(self, filedir): ''' Remove all files in the root directory :param
filedir: Folder name ''' # Read all the csv file name filenames_csv = os.listdir(filedir) for
filename in tqdm(filenames_csv): self.deleteCsvRepetition(filedir + '/' +
filename) if __name__ == '__main__': test = DeleteRepetition()
test.deleteCsvsRepetition('20190311_0408')

Technology
©2020 ioDraw All rights reserved
Self made whole person computer program PHP call shell command python Simple record of network programming layui.table Examples of dynamically getting header and list data Big data environment --- data warehouse (hive+mysql+hadoop) The construction of What are the types of variables ?MYSQL database DML Common commands 《 From machine learning to deep learning 》 note (2) Unsupervised learning log4j Method of printing exception stack information Android Development — Display food information according to customer budget