Python操作Excel终极指南:告别手动复制粘贴的噩梦
说实话,作为程序员,我最怕听到的不是"需求又改了",而是——
"帮我把这几十个Excel表的数据合并一下"
"把这两列数据提取出来做个汇总"
"给每个sheet加个格式,要好看的那种"
然后我就打开了Excel,开始Ctrl+C、Ctrl+V、拖拽、筛选、复制……半小时过去了,手都酸了,眼睛也花了。
停!你是程序员啊!这种重复性工作不应该交给代码吗?今天这篇文章,就把 Python 操作 Excel 的全套技能给你讲透。看完你就能把这些脏活累活全甩给脚本,自己泡杯茶看着它跑就行。
为什么选 openpyxl?
Python 操作 Excel 的库不少,但真正好用的就这几个:
| 库名 | 读 | 写 | 格式 | 适用场景 |
|---|---|---|---|---|
| openpyxl | ✅ | ✅ | ✅ 样式/图表/公式 | 首选,.xlsx 文件全能选手 |
| pandas | ✅ | ❌ 弱 | ❌ | 数据分析,批量读写快 |
| xlrd/xlwt | ✅ | ✅ | ❌ | 老项目 .xls 格式 |
| xlsxwriter | ❌ | ✅ | ✅ | 只写不读,生成报表用 |
先装一下:
pip install openpyxl场景一:批量读取数据(再也不用手动翻表了)
基础读取
from openpyxl import load_workbook
# 打开已有的Excel文件
wb = load_workbook('数据表.xlsx')
# 查看所有sheet名称
print(wb.sheetnames) # ['Sheet1', 'Sheet2', '汇总']
# 选择一个sheet
ws = wb['Sheet1']
# 读取单个单元格(三种方式)
print(ws['A1'].value) # 用坐标
print(ws.cell(row=1, column=1).value) # 用行列号
# 读取一行
for cell in ws[1]:
print(cell.value, end=' ')
# 输出:姓名 年龄 部门 薪资
# 读取所有数据(常用!)
data = []
for row in ws.iter_rows(min_row=2, values_only=True): # 从第2行开始,跳过表头
data.append(row)
print(data)
# [('张三', 28, '研发部', 15000), ('李四', 32, '产品部', 18000), ...]按条件筛选数据
from openpyxl import load_workbook
wb = load_workbook('员工表.xlsx')
ws = wb.active
# 筛选薪资大于15000的员工
high_salary = []
for row in ws.iter_rows(min_row=2, values_only=True):
name, age, dept, salary = row
if salary and salary > 15000:
high_salary.append({
'姓名': name,
'部门': dept,
'薪资': salary
})
for emp in high_salary:
print(f"{emp['姓名']} - {emp['部门']} - ¥{emp['薪资']}"):以前翻半小时的表,现在3行代码搞定。
场景二:创建和写入Excel(从零生成报表)
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
# 创建新的工作簿
wb = Workbook()
ws = wb.active
ws.title = '销售报表'
# ====== 写入数据 ======
headers = ['产品名称', '销量', '单价', '销售额']
data = [
['手机A', 1200, 2999],
['手机B', 800, 3999],
['耳机X', 3500, 299],
['充电宝Y', 2100, 89],
]
# 写入表头
for col, header in enumerate(headers, start=1):
cell = ws.cell(row=1, column=col, value=header)
# 写入数据并计算销售额
for row_idx, row_data in enumerate(data, start=2):
for col_idx, value in enumerate(row_data, start=1):
ws.cell(row=row_idx, column=col_idx, value=value)
# 销售额 = 销量 × 单价(写入公式)
ws.cell(row=row_idx, column=4, value=f'=B{row_idx}*C{row_idx}')
# ====== 设置格式(让表格好看!)=====
# 表头样式:加粗、白色字、深蓝背景
header_font = Font(bold=True, color='FFFFFF', size=12)
header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
header_align = Alignment(horizontal='center', vertical='center')
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# 应用表头样式
for col in range(1, 5):
cell = ws.cell(row=1, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_align
cell.border = thin_border
# 数据区域样式:居中、边框
data_align = Alignment(horizontal='center', vertical='center')
alt_fill = PatternFill(start_color='F0F8FF', end_color='F0F8FF', fill_type='solid') # 隔行变色
for row in range(2, len(data) + 2):
for col in range(1, 5):
cell = ws.cell(row=row, column=col)
cell.alignment = data_align
cell.border = thin_border
if row % 2 == 0:
cell.fill = alt_fill
# 数字格式化:货币
for row in range(2, len(data) + 2):
ws.cell(row=row, column=3).number_format = '¥#,##0' # 单价
ws.cell(row=row, column=4).number_format = '¥#,##0' # 销售额
# ====== 冻结首行 + 列宽自适应 ======
ws.freeze_panes = 'A2' # 冻结第一行,滚动时表头不动
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 10
ws.column_dimensions['C'].width = 10
ws.column_dimensions['D'].width = 12
# 保存
wb.save('销售报表.xlsx')
print('✅ 报表已生成:销售报表.xlsx'):一个带样式、有公式、能直接发给老板看的Excel报表。
场景三:合并多个Excel文件(HR和财务最爱)
这个需求太常见了——每个月各部门交上来的表格,你要合并成一张总表。
from openpyxl import load_workbook, Workbook
import os
import glob
def merge_excels(file_pattern, output_file, has_header=True):
"""
合并多个Excel文件到一个工作簿
:param file_pattern: 文件通配符,如 'sales/*.xlsx'
:param output_file: 输出文件名
:param has_header: 是否保留表头(只保留第一个文件的)
"""
files = glob.glob(file_pattern)
if not files:
print(f'❌ 没有找到匹配的文件: {file_pattern}')
return
wb_out = Workbook()
ws_out = wb_out.active
ws_out.title = '合并结果'
header_written = False
for idx, file_path in enumerate(files):
print(f'📂 正在处理: {os.path.basename(file_path)}')
try:
wb_in = load_workbook(file_path)
ws_in = wb_in.active
# 决定是否跳过表头
start_row = 1 if (not header_written or not has_header) else 2
for row in ws_in.iter_rows(min_row=start_row, values_only=True):
ws_out.append(list(row))
if has_header and not header_written:
header_written = True
wb_in.close()
except Exception as e:
print(f'⚠️ 处理 {os.path.basename(file_path)} 出错: {e}')
wb_out.save(output_file)
print(f'\n✅ 合并完成!共 {len(files)} 个文件 → {output_file}')
print(f'📊 总计 {ws_out.max_row - 1} 条数据(不含表头)')
# 使用示例
merge_excels(
file_pattern='月报/*.xlsx',
output_file='月报汇总.xlsx',
has_header=True
):50个部门的Excel,30秒合并完毕。
场景四:拆分大表为多个小表
反过来,有时候你需要把一个大表按某个字段拆分成多个小文件:
from openpyxl import load_workbook, Workbook
from collections import defaultdict
def split_by_column(input_file, column_index, output_dir):
"""
按指定列拆分Excel
:param input_file: 输入文件
:param column_index: 拆分依据的列号(从1开始)
:param output_dir: 输出目录
"""
os.makedirs(output_dir, exist_ok=True)
wb_in = load_workbook(input_file)
ws_in = wb_in.active
# 按列值分组
groups = defaultdict(list)
headers = None
for row_idx, row in enumerate(ws_in.iter_rows(values_only=True)):
if row_idx == 0:
headers = list(row)
continue
key = str(row[column_index - 1]) if row[column_index - 1] else '未分类'
groups[key].append(list(row))
# 每个分组保存为一个文件
for key, rows in groups.items():
wb_out = Workbook()
ws_out = wb_out.active
ws_out.title = key[:31] # sheet名最长31字符
if headers:
ws_out.append(headers)
for row in rows:
ws_out.append(row)
safe_name = ''.join(c for c in key if c.isalnum() or c in '_-') or 'unknown'
output_path = os.path.join(output_dir, f'{safe_name}.xlsx')
wb_out.save(output_path)
print(f'✅ {key}: {len(rows)}条 → {output_path}')
print(f'\n📊 共拆分出 {len(groups)} 个文件')
# 使用示例:按部门拆分员工表
split_by_column('全员名单.xlsx', column_index=3, output_dir='按部门/')场景五:对比两个表的差异
财务对账、版本对比,这个功能救过我不止一次命:
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
RED = PatternFill(start_color='FFCCCC', end_color='FFCCCC', fill_type='solid')
GREEN = PatternFill(start_color='CCFFCC', end_color='CCFFCC', fill_type='solid')
YELLOW = PatternFill(start_color='FFFFCC', end_color='FFFFCC', fill_type='solid')
def compare_excels(file_a, file_b, key_col, compare_cols, output_file):
"""
对比两个Excel的差异
:param key_col: 主键列号(用于匹配同一行记录)
:param compare_cols: 要比较的列号列表
"""
wb_a = load_workbook(file_a)
wb_b = load_workbook(file_b)
ws_a = wb_a.active
ws_b = wb_b.active
# 构建字典以便对比
def build_map(ws, key_col):
data_map = {}
for row in ws.iter_rows(min_row=2, values_only=True):
key = str(row[key_col - 1])
data_map[key] = row
return data_map
map_a = build_map(ws_a, key_col)
map_b = build_map(ws_b, key_col)
keys_all = set(map_a.keys()) | set(map_b.keys())
# 新建差异报告
from openpyxl import Workbook
wb_out = Workbook()
ws_out = wb_out.active
ws_out.title = '差异报告'
# 写入表头
headers = ['主键', '状态'] + [f'列{c}' for c in compare_cols]
ws_out.append(headers)
only_in_a = 0
only_in_b = 0
diff_count = 0
for key in sorted(keys_all):
if key not in map_a:
row_data = ['仅在新表中', key] + [map_b[key][c-1] if c <= len(map_b[key]) else '' for c in compare_cols]
ws_out.append(row_data)
for col in range(1, len(row_data)+1):
ws_out.cell(row=ws_out.max_row, column=col).fill = RED
only_in_b += 1
elif key not in map_b:
row_data = ['仅在旧表中', key] + [map_a[key][c-1] if c <= len(map_a[key]) else '' for c in compare_cols]
ws_out.append(row_data)
for col in range(1, len(row_data)+1):
ws_out.cell(row=ws_out.max_row, column=col).fill = YELLOW
only_in_a += 1
else:
differences = []
for c in compare_cols:
val_a = map_a[key][c-1] if c < len(map_a[key]) else ''
val_b = map_b[key][c-1] if c < len(map_b[key]) else ''
if val_a != val_b:
differences.append(f'{val_a}→{val_b}')
if differences:
row_data = ['有变化', key] + differences
ws_out.append(row_data)
ws_out.cell(row=ws_out.max_row, column=1).fill = GREEN
diff_count += 1
wb_out.save(output_file)
print(f'📊 对比完成!')
print(f' 🟡 仅在旧表: {only_in_a} 条')
print(f' 🔴 仅在新表: {only_in_b} 条')
print(f' 🟢 有变化: {diff_count} 条')
print(f' 📄 报告已保存到: {output_file}')
# 使用示例:对比本月和上月工资表
compare_excels(
file_a='工资表-上月.xlsx',
file_b='工资表-本月.xlsx',
key_col=1, # 以姓名为主键
compare_cols=[4], # 对比薪资列
output_file='工资变动报告.xlsx'
)输出结果一目了然,红色新增、黄色删除、绿色变化。
场景六:批量修改格式(一次性搞定所有丑表格)
from openpyxl import load_workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
def beautify_excel(file_path, output_path=None):
"""一键美化Excel表格"""
if output_path is None:
output_path = file_path
wb = load_workbook(file_path)
ws = wb.active
# 定义样式
title_font = Font(bold=True, size=14, color='1F4E79')
header_font = Font(bold=True, color='FFFFFF', size=11)
header_fill = PatternFill(start_color='2E75B6', end_color='2E75B6', fill_type='solid')
border = Border(
left=Side(style='thin', color='D9D9D9'),
right=Side(style='thin', color='D9D9D9'),
top=Side(style='thin', color='D9D9D9'),
bottom=Side(style='thin', color='D9D9D9')
)
center = Alignment(horizontal='center', vertical='center', wrap_text=True)
# 处理每一行
for row_idx, row in enumerate(ws.iter_rows(), start=1):
for col_idx, cell in enumerate(row, start=1):
cell.border = border
if row_idx == 1: # 表头
cell.font = header_font
cell.fill = header_fill
cell.alignment = center
else: # 数据区
cell.alignment = Alignment(vertical='center')
# 自动识别数字格式
if isinstance(cell.value, (int, float)):
cell.number_format = '#,##0.00' if isinstance(cell.value, float) else '#,##0'
cell.alignment = Alignment(horizontal='right', vertical='center')
# 隔行变色
if row_idx % 2 == 0:
cell.fill = PatternFill(start_color='F2F2F2', end_color='F2F2F2', fill_type='solid')
# 自适应列宽(简单版)
for col_cells in ws.columns:
max_length = 0
column = col_cells[0].column_letter
for cell in col_cells:
try:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
except:
pass
ws.column_dimensions[column].width = min(max_length + 3, 40)
# 冻结首行
ws.freeze_panes = 'A2'
# 设置打印区域和页面方向
ws.print_title_rows = '1:1' # 每页重复打印表头
wb.save(output_path)
print(f'✅ 美化完成: {output_path}')
# 批量美化目录下所有xlsx
import glob
for f in glob.glob('待处理/*.xlsx'):
beautify_excel(f, f'已美化/{f.split("/")[-1]}')高级技巧速查卡
图表生成
from openpyxl.chart import BarChart, Reference
# 基于已有数据创建柱状图
chart = BarChart()
chart.type = "col"
chart.grouping = "clustered"
chart.title = "各产品销售对比"
chart.y_axis.title = '销售额'
chart.x_axis.title = '产品'
data = Reference(ws, min_col=4, min_row=1, max_row=5, max_col=4)
categories = Reference(ws, min_col=1, min_row=2, max_row=5)
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
chart.shape = 4
ws.add_chart(chart, "F2")数据验证(下拉框)
from openpyxl.worksheet.datavalidation import DataValidation
# 创建下拉选项
dv = DataValidation(
type="list",
formula1='"研发部,产品部,市场部,财务部,人事部"',
allow_blank=True
)
dv.error = '请从下拉列表中选择部门'
dv.errorTitle = '无效输入'
# 应用到某列
dv.add('C2:C100')
ws.add_data_validation(dv)条件格式
from openpyxl.formatting.rule import CellIsRule
from openpyxl.styles import PatternFill
# 大于10000的单元格标红
red_fill = PatternFill(start_color='FF0000', end_color='FF0000', fill_type='solid')
rule = CellIsRule(operator='greaterThan', formula=['10000'], fill=red_fill)
ws.conditional_formatting.add('D2:D100', rule)性能优化:大数据量怎么办?
当数据量超过几万行时,openpyxl 默认模式会比较慢。这时候要用只写模式:
# 只写模式(超快!适合大批量导出)
from openpyxl import Workbook
wb = Workbook(write_only=True)
ws = wb.create_sheet()
# 写入10万行数据测试
for i in range(100000):
ws.append([f'数据{i}', i * 100, '分类A' if i % 2 == 0 else '分类B'])
wb.save('大量数据.xlsx') # 秒级完成| 数据量 | 普通模式 | 只写模式 |
|---|---|---|
| 1000行 | <1秒 | <1秒 |
| 1万行 | ~5秒 | ~1秒 |
| 10万行 | ~60秒 | ~3秒 |
| 50万行 | 内存爆炸 | ~15秒 |
write_only=True,读大文件用 read_only=True。总结
| 场景 | 核心函数 | 代码量 |
|---|---|---|
| 读取数据 | load_workbook + iter_rows | ~10行 |
| 创建报表 | Workbook + 样式设置 | ~40行 |
| 合并多表 | glob + 循环 append | ~25行 |
| 拆分大表 | defaultdict 分组 | ~20行 |
| 对比差异 | 字典比对 + 颜色标记 | ~45行 |
| 批量美化 | 遍历单元格设样式 | ~30行 |
说到底,程序员的核心竞争力不是手速快,而是能把重复劳动自动化。
下次再有人让你"帮忙弄一下Excel",别傻乎乎地打开鼠标点来点去了。花10分钟写个脚本,以后同类需求一键搞定,省下的时间去摸鱼不香吗?
觉得有用?点赞 👍 在看 🔥 转发 ↗️ 支持一下!关注「几行代码」,每天学点实用的编程技巧 💪
夜雨聆风