223 lines
6.6 KiB
Python
223 lines
6.6 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
生成监狱测试数据
|
|||
|
|
直接生成符合系统要求的Excel文件
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import random
|
|||
|
|
from datetime import datetime, timedelta
|
|||
|
|
|
|||
|
|
# ===== 系统实际选项配置 =====
|
|||
|
|
|
|||
|
|
# 班级选项
|
|||
|
|
CLASS_OPTIONS = [
|
|||
|
|
'中级班1班',
|
|||
|
|
'初级班1班',
|
|||
|
|
'高级班1班',
|
|||
|
|
'攻坚转换班1班',
|
|||
|
|
'攻坚转换班2班'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 文化程度选项
|
|||
|
|
EDUCATION_OPTIONS = [
|
|||
|
|
'文盲', '小学', '初中', '高中', '中专', '大专', '本科', '研究生'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 性别选项
|
|||
|
|
SEX_OPTIONS = ['男', '女']
|
|||
|
|
|
|||
|
|
# 状态选项
|
|||
|
|
STATUS_OPTIONS = ['在押', '释放', '外出', '假释']
|
|||
|
|
|
|||
|
|
# 监区选项
|
|||
|
|
PRISON_AREA_OPTIONS = [
|
|||
|
|
'一监区', '二监区', '三监区', '四监区', '五监区',
|
|||
|
|
'六监区', '七监区', '八监区'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 民族选项
|
|||
|
|
ETHNICITY_OPTIONS = [
|
|||
|
|
'汉族', '回族', '维吾尔族', '壮族', '满族', '彝族',
|
|||
|
|
'土家族', '藏族', '苗族', '蒙古族'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 罪名选项
|
|||
|
|
CRIME_OPTIONS = [
|
|||
|
|
'盗窃罪', '抢劫罪', '诈骗罪', '故意伤害罪', '寻衅滋事罪',
|
|||
|
|
'交通肇事罪', '贩卖毒品罪', '非法持有毒品罪', '走私罪',
|
|||
|
|
'受贿罪', '挪用公款罪', '贪污罪', '强奸罪', '故意杀人罪'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 姓氏列表
|
|||
|
|
SURNAMES = [
|
|||
|
|
'张', '王', '李', '赵', '陈', '刘', '杨', '黄', '周', '吴',
|
|||
|
|
'徐', '孙', '马', '朱', '胡', '郭', '何', '高', '林', '罗',
|
|||
|
|
'钱', '韩', '田', '方', '石', '姚', '谭', '廖', '邹', '熊',
|
|||
|
|
'冯', '于', '董', '袁', '蔡', '余', '杜', '叶', '程', '魏'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 名字列表
|
|||
|
|
GIVEN_NAMES = [
|
|||
|
|
'伟', '芳', '娜', '秀英', '敏', '静', '丽', '强', '磊', '军',
|
|||
|
|
'洋', '勇', '艳', '杰', '涛', '明', '超', '鹏', '辉', '华',
|
|||
|
|
'刚', '平', '波', '文', '玲', '霞', '红', '燕', '飞', '龙',
|
|||
|
|
'建', '国', '东', '斌', '涛', '浩', '宇', '天', '凯', '鑫'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def generate_name():
|
|||
|
|
"""生成随机中文姓名"""
|
|||
|
|
surname = random.choice(SURNAMES)
|
|||
|
|
# 70%概率双字名,30%概率单字名
|
|||
|
|
if random.random() < 0.7:
|
|||
|
|
name = surname + random.choice(GIVEN_NAMES) + random.choice(GIVEN_NAMES)
|
|||
|
|
else:
|
|||
|
|
name = surname + random.choice(GIVEN_NAMES)
|
|||
|
|
return name
|
|||
|
|
|
|||
|
|
def generate_random_date(start_year=2020, end_year=2024):
|
|||
|
|
"""生成随机日期"""
|
|||
|
|
start_date = datetime(start_year, 1, 1)
|
|||
|
|
end_date = datetime(end_year, 12, 31)
|
|||
|
|
time_between = end_date - start_date
|
|||
|
|
days_between = time_between.days
|
|||
|
|
random_days = random.randint(0, days_between)
|
|||
|
|
return start_date + timedelta(days=random_days)
|
|||
|
|
|
|||
|
|
def generate_sentence_dates():
|
|||
|
|
"""生成刑期相关日期"""
|
|||
|
|
# 刑期起日(入监时间)
|
|||
|
|
start_date = generate_random_date(2020, 2024)
|
|||
|
|
|
|||
|
|
# 刑期(月数)
|
|||
|
|
sentence_months = random.choice([12, 18, 24, 36, 48, 60, 72, 84, 96, 120])
|
|||
|
|
|
|||
|
|
# 刑期止日
|
|||
|
|
end_date = start_date + timedelta(days=sentence_months * 30)
|
|||
|
|
|
|||
|
|
return start_date, end_date, sentence_months
|
|||
|
|
|
|||
|
|
def generate_test_data(count=100, start_id=201):
|
|||
|
|
"""
|
|||
|
|
生成测试数据
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
count: 生成数据条数
|
|||
|
|
start_id: 起始信息编号
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
DataFrame
|
|||
|
|
"""
|
|||
|
|
print(f"开始生成 {count} 条测试数据...")
|
|||
|
|
|
|||
|
|
data = []
|
|||
|
|
|
|||
|
|
for i in range(count):
|
|||
|
|
# 生成刑期相关日期
|
|||
|
|
sentence_start, sentence_end, sentence_months = generate_sentence_dates()
|
|||
|
|
|
|||
|
|
# 生成一条记录
|
|||
|
|
record = {
|
|||
|
|
'信息编号': start_id + i,
|
|||
|
|
'罪犯姓名': generate_name(),
|
|||
|
|
'监狱': '第一监狱',
|
|||
|
|
'监区': random.choice(PRISON_AREA_OPTIONS),
|
|||
|
|
'班级': random.choice(CLASS_OPTIONS),
|
|||
|
|
'性别': random.choice(SEX_OPTIONS),
|
|||
|
|
'民族': random.choice(ETHNICITY_OPTIONS),
|
|||
|
|
'文化程度': random.choice(EDUCATION_OPTIONS),
|
|||
|
|
'罪名': random.choice(CRIME_OPTIONS),
|
|||
|
|
'刑期': sentence_months,
|
|||
|
|
'刑期起日': sentence_start.strftime('%Y-%m-%d'),
|
|||
|
|
'刑期止日': sentence_end.strftime('%Y-%m-%d'),
|
|||
|
|
'入监时间': sentence_start.strftime('%Y-%m-%d'),
|
|||
|
|
'状态': random.choice(STATUS_OPTIONS)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
data.append(record)
|
|||
|
|
|
|||
|
|
# 每生成10条显示进度
|
|||
|
|
if (i + 1) % 10 == 0:
|
|||
|
|
print(f" 已生成 {i + 1}/{count} 条")
|
|||
|
|
|
|||
|
|
df = pd.DataFrame(data)
|
|||
|
|
return df
|
|||
|
|
|
|||
|
|
def print_statistics(df):
|
|||
|
|
"""打印数据统计信息"""
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("📊 生成数据统计")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 总记录数: {len(df)} 条")
|
|||
|
|
|
|||
|
|
print("\n✅ 班级分布:")
|
|||
|
|
for cls, count in df['班级'].value_counts().items():
|
|||
|
|
print(f" {cls}: {count}人")
|
|||
|
|
|
|||
|
|
print("\n✅ 文化程度分布:")
|
|||
|
|
for edu, count in df['文化程度'].value_counts().items():
|
|||
|
|
print(f" {edu}: {count}人")
|
|||
|
|
|
|||
|
|
print("\n✅ 性别分布:")
|
|||
|
|
for sex, count in df['性别'].value_counts().items():
|
|||
|
|
print(f" {sex}: {count}人")
|
|||
|
|
|
|||
|
|
print("\n✅ 状态分布:")
|
|||
|
|
for status, count in df['状态'].value_counts().items():
|
|||
|
|
print(f" {status}: {count}人")
|
|||
|
|
|
|||
|
|
print("\n✅ 监区分布:")
|
|||
|
|
for area, count in df['监区'].value_counts().items():
|
|||
|
|
print(f" {area}: {count}人")
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""主函数"""
|
|||
|
|
print("="*60)
|
|||
|
|
print("🔧 监狱测试数据生成工具")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
# 配置参数
|
|||
|
|
count = 3200 # 生成数据条数
|
|||
|
|
start_id = 201 # 起始信息编号
|
|||
|
|
output_file = 'test_data.xlsx'
|
|||
|
|
|
|||
|
|
print(f"配置: 生成 {count} 条数据,起始编号 {start_id}")
|
|||
|
|
print(f"输出文件: {output_file}")
|
|||
|
|
print("="*60 + "\n")
|
|||
|
|
|
|||
|
|
# 生成数据
|
|||
|
|
df = generate_test_data(count, start_id)
|
|||
|
|
|
|||
|
|
# 打印统计
|
|||
|
|
print_statistics(df)
|
|||
|
|
|
|||
|
|
# 保存文件
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print(f"💾 保存到文件: {output_file}")
|
|||
|
|
df.to_excel(output_file, index=False, engine='openpyxl')
|
|||
|
|
|
|||
|
|
print("✅ 生成完成!")
|
|||
|
|
print(f"📁 文件位置: {output_file}")
|
|||
|
|
print(f"📝 数据行数: {len(df)}")
|
|||
|
|
print(f"📋 列数: {len(df.columns)}")
|
|||
|
|
|
|||
|
|
print("\n前5行数据预览:")
|
|||
|
|
print(df.head().to_string())
|
|||
|
|
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("📖 使用说明")
|
|||
|
|
print("="*60)
|
|||
|
|
print("1. ✅ 使用生成的文件: test_data.xlsx")
|
|||
|
|
print("2. ✅ 在系统中导入该文件")
|
|||
|
|
print("3. ✅ 所有字段均为系统可接受的值")
|
|||
|
|
print("4. ✅ 数据完全随机生成,可重复运行")
|
|||
|
|
print("\n可用班级:")
|
|||
|
|
for cls in CLASS_OPTIONS:
|
|||
|
|
print(f" - {cls}")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|