<> background

python use pandas Library or other tool processing excel There's an embarrassing question about the document , It's preserved excel There is little typography in the document , Like this :

Each column is the same width , Some column contents that are too long will overflow the cell directly , It's not beautiful . A small number of tables can also patiently adjust the column width manually , It is troublesome to deal with too many forms . So I thought of a way to solve this problem .

<> thinking

The key to this problem is to get the width of each column , And adjust it . Usually, we only need to get the maximum length of each column to achieve the goal . The length here should not be the length of the string , It's a string of characters in the excel Length in table . After observation, it can be found that , A Chinese character “ length ” About 2, The length of a letter or number is 1. Because most of the contents are composed of Chinese characters, letters and numbers , So knowing this is enough to complete my program , It's used here ord Function to get the ASCII Value to simply determine whether it is a Chinese character .
Core code :
for elem in str_list: elem_split = list(elem) length = 0 for c in elem_split:
if ord(c) <= 256: length += 1 else: length += 2 len_list.append(length)
<> Complete implementation

Of course, adjusting the column width is not enough , I also want to be able to wrap when the string is too long , merge cell , Coloring and other functions , So I finally encapsulated it as a class , Various functions can be realized according to different requirements .
include : Adaptive column width when saving files , add to sheet Instead of covering the original xlsx file , Merge cells with the same content, etc

* 2020-04-18 code update # -*- coding:utf8 -*- from openpyxl.styles import Alignment
from openpyxl.styles import PatternFill from openpyxl.styles import Font from
openpyxlimport Workbook, load_workbook from os import remove, path class
XlsxSaver: """ A general DataFrame Convert to format excel Tools for """ def __init__(self, df_in, filename
='a.xlsx', sheet_name='Sheet1'): """ df_in : From a DataFrame Object to get the table content filename :
file name sheet_name : Table name """ self.filename = filename # Preserved xlsx Name of the document self.user_def =
[] # The column name of a user-defined column , These columns are no longer involved in the automatic calculation of column widths if path.exists(filename): #
If the file exists , Just open it , add to Sheet self.wb = load_workbook(filename) self.sheet = self.wb.
create_sheet(sheet_name) else: # If the file does not exist , Just create the form self.wb = Workbook() self.sheet =
self.wb.active self.sheet.title = sheet_name # take df Copy to sheet self.df = df_in.
copy() self.sheet.append(list(self.df.columns)) for row in range(0, len(list(
self.df.index))): for col in range(0, len(list(self.df.columns))): self.sheet.
cell(row+2, col+1).value = self.df.iloc[row, col] # be careful :sheet Line up from 1 Start counting def
remove_file(self): remove(self.filename) def set_sheet_name(self, sheet_name):
self.sheet.title = sheet_name def set_filename(self, filename): self.filename =
filenamedef get_maxlength(self, series_in, col): """
Gets a type of object Of Series Maximum footprint length in , Used to determine the exported xlsx The column width of the file col : Header , Also participate in the comparison , Solve the problem that sometimes the header is too long """
series= series_in.fillna('-') # Fill in null values , Prevention nan str_list = list(series) len_list =
[] for elem in str_list + [col]: elem_split = list(elem) length = 0 for c in
elem_split: if ord(c) <= 256: length += 1 else: length += 2 len_list.append(
length) return max(len_list) def __auto_width(self): cols_list = list(self.df.
columns) # Get column name for i in range(0, len(cols_list)): col = cols_list[i] if col in
self.user_def: continue self.sheet.cell(1, i+1).font = Font(bold=True) # Bold meter
letter= chr(i+65) # from ASCII Value to get the corresponding column letter max_len = self.get_maxlength(self.df[col].
astype(str), col) if max_len <= 12: self.sheet.column_dimensions[letter].width =
12 elif max_len <= 50: self.sheet.column_dimensions[letter].width = max_len + 2
else: self.sheet.column_dimensions[letter].width = 50 for cell in self.sheet[
letter]: cell.alignment = Alignment(wrap_text=True) def set_width(self, col_name
, width): # Provides an interface for adjusting column width index = list(self.df.columns).index(col_name) letter = chr
(index+65) self.sheet.column_dimensions[letter].width = width self.user_def.
append(col_name) def set_color(self, col_name, color, rule): #
Provides an interface for setting colors ,rule: Regular function index = list(self.df.columns).index(col_name) letter = chr(
index+65) for cell in self.sheet[letter]: if rule(cell.value): cell.fill =
PatternFill(fill_type="solid", start_color=color, end_color=color) def
set_center_alignment(self, col_name): index = list(self.df.columns).index(
col_name) letter = chr(index+65) for cell in self.sheet[letter]: cell.alignment
= Alignment(wrap_text=True, horizontal='center') def save(self): # Auto adjust column width , And keep it
self.__auto_width() self.wb.save(self.filename) def set_merge(self, col_name):
self.user_def.append(col_name) # Set to custom column # Set a column to merge cells index = list(self.df.
columns).index(col_name) letter = chr(index+65) i = 1 while True: if i >= self.
sheet.max_row: # End condition : Cell to bottom break cell = self.sheet[letter+str(i)] j = i+1 #
The first step points to the next cell while True: # The process is right j Try it out , final j The cells that point to are and i The last one in a row cell_next = self.
sheet[letter+str(j)] if cell_next.value != cell.value: j -= 1 break else: j += 1
if j > self.sheet.max_row: j -= 1 break if j-i >= 1 and cell.value != '' and
cell.value: # If there are more than two consecutive cells, the contents are the same , Integration msg = '%s%d:%s%d'%(letter,i,letter,j) self.
sheet.merge_cells(msg) # Control the format self.sheet[letter+str(i)].alignment = Alignment(
horizontal='center', vertical='top', wrap_text=True) i = j+1 # Continue to point to the next cell
Examples of use
df = pd.read_sql(SQL, conn) # Get a table from the database xlsx = XlsxSaver(df, 'output.xlsx',
'sheet1') # Initializing an object , Set the saved file name and table name xlsx.set_width('col1', 30) # Manually specify the column width of a column
xlsx.set_merge('col2') # set up col2 Columns automatically merge adjacent identical cells xlsx.save() # Storage ,
In addition to col2 The column width was manually specified , Other columns perform adaptive column widths
notes : If there are no special requirements for the format, there is no need to spend so much time , You can use the following functions
def save_as_excel(df,xlsx_name='a.xlsx',sheet_name='sheet1',startrow=0,startcol
=0): """ The function is to save a df The object is excel file (xlsx) df: To save dataFrame object xlsx_name: Name to save , Pay attention to the suffix
sheet_name: The name of the table to save """ writer=pd.ExcelWriter(xlsx_name) df.to_excel(writer,
sheet_name= sheet_name,startrow=startrow,startcol=startcol,index=False) writer.
save()# This line needs to be added , Otherwise, the file cannot be generated

Technology