import matplotlib.pyplot as plt
import pandas as pd


logfile= 'G:/tmp/access_nginx_2018-11-20.log/access_nginx_2018-11-20a.log'
logfile_format=['host','server_addr','remote_addr','request_time','nu11','remote_user','msec','time_local','request_method','request_uri','status','body_bytes_sent','http_referer','http_user_agent','http_x_forwarded_for','nu01','nu02','nu03','nu04','nu05','nu06','request_body','nu07','nu08','nu09','nu10']
#将统计的字段读入到dataframe中
reader=pd.read_table(logfile,sep='|',engine='python',names=logfile_format ,error_bad_lines=False ,header=None,encoding='utf-8',index_col=False)
df_groupd_max=reader['host'].value_counts().head(10).sort_values()
#df_ana=pd.concat([df_groupd_max,df_groupd_min,df_groupd_mean,df_groupd_size],axis=1,keys=["max","min","average","count"])
print("output excel")
#df_groupd_max.to_excel("test.xls")


plt.barh(range(10),df_groupd_max.values, height=0.7, color='steelblue', alpha=0.8)
plt.yticks(range(10), df_groupd_max.index)
#plt.xlim(500,400)
plt.xlabel("mun")
plt.title("domain" )
for x, y in enumerate(df_groupd_max.values):
    plt.text(y + 0.2, x - 0.1, '%s' % y)
plt.show()
文档更新时间: 2019-06-14 13:20   作者:月影鹏鹏