🚀 Python脚本实现数据处理与可视化
本脚本的核心目标是通过一系列数据库查询操作,获取服务端设备的相关信息,计算区域分布百分比和变异系数,并使用pyecharts
库进行数据可视化展示。
🚀 快速开始
功能概述
脚本主要完成以下几个步骤:
- 从
db_alpha
库中查询服务端设备的ID和所属组号。
- 针对每台服务端设备,在
db_beta
库中查询其区域分布情况。
- 处理区域为空的情况,从
db_alpha
库中补充缺失的区域信息。
- 合并两部分结果,计算区域分布百分比和变异系数。
- 使用
pyecharts
库绘制单台服务端设备和每个组的区域分布百分比图。
- 计算每个组的平均变异系数,并绘制柱状图。
代码实现
import pymysql
import pandas as pd
from scipy.stats import variation
from pyecharts.charts import Pie, Bar
from pyecharts import options as opts
def connect_db(host, user, password, database):
return pymysql.connect(host=host, user=user, password=password, database=database)
def execute_query(connection, query):
with connection.cursor() as cursor:
cursor.execute(query)
return cursor.fetchall()
alpha_conn = connect_db('host_alpha', 'user_alpha', 'password_alpha', 'db_alpha')
server_query = """
select ex.device_id as id, ex.group_no
from service_config_alpha ex
where ex.is_active = 1
and ex.status = 1
union all
select dv.id, dv.group_no
from device_config_beta dv
where dv.is_active = 1
and dv.status = 1
"""
servers = execute_query(alpha_conn, server_query)
alpha_conn.close()
for server in servers:
server_id = server[0]
beta_conn = connect_db('host_beta', 'user_beta', 'password_beta', 'db_beta')
beta_query = f"""
select loc.region,
count(1)
from connection_log cnt
left join device_location loc on loc.id = cnt.client_id
where server_id = {server_id}
group by loc.region
"""
beta_result = execute_query(beta_conn, beta_query)
null_client_ids = [row[0] for row in beta_result if row[0] is None]
alpha_conn = connect_db('host_alpha', 'user_alpha', 'password_alpha', 'db_alpha')
alpha_query = f"""
select loc.region,
count(1)
from device_location loc
where loc.id in ({','.join(map(str, null_client_ids))})
group by loc.region
"""
alpha_result = execute_query(alpha_conn, alpha_query)
alpha_conn.close()
combined_result = beta_result + alpha_result
total_count = sum([row[1] for row in combined_result])
percentages = [(row[0], row[1] / total_count) for row in combined_result]
counts = [row[1] for row in combined_result]
cv = variation(counts)
pie = (
Pie()
.add(
"区域分布",
[list(z) for z in zip([row[0] for row in percentages], [row[1] for row in percentages])],
radius=["30%", "75%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title=f"Server {server_id} 区域分布百分比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie.render(f"server_{server_id}_region_distribution.html")
beta_conn.close()
group_data = {}
for server in servers:
server_id = server[0]
group_no = server[1]
if group_no not in group_data:
group_data[group_no] = []
group_data[group_no].extend(combined_result)
group_cvs = {}
for group_no, data in group_data.items():
counts = [row[1] for row in data]
cv = variation(counts)
group_cvs[group_no] = cv
total_count = sum([row[1] for row in data])
percentages = [(row[0], row[1] / total_count) for row in data]
pie = (
Pie()
.add(
"区域分布",
[list(z) for z in zip([row[0] for row in percentages], [row[1] for row in percentages])],
radius=["30%", "75%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title=f"Group {group_no} 区域分布百分比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie.render(f"group_{group_no}_region_distribution.html")
bar = (
Bar()
.add_xaxis(list(group_cvs.keys()))
.add_yaxis("平均变异系数", list(group_cvs.values()))
.set_global_opts(
title_opts=opts.TitleOpts(title="每个组的平均变异系数"),
xaxis_opts=opts.AxisOpts(name="组号"),
yaxis_opts=opts.AxisOpts(name="平均变异系数"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
)
bar.render("group_avg_cv.html")
注意事项
- 请根据实际情况修改数据库连接信息(主机名、用户名、密码等)。
- 确保
pymysql
和pyecharts
库已正确安装。
- 代码中的SQL语句可能需要根据实际数据库表结构进行调整。