# encoding=utf8 import pymysql import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from io import BytesIO from datetime import datetime, date from datetime import timedelta import xlsxwriter def get_data(): mysql_host = '47.95.217.180' mysql_port = 3306 mysql_user = 'rtech' mysql_password = 'S5RE4dQ65MphYk7F' mysql_db = 'fmp' db = pymysql.connect(host=mysql_host,port=mysql_port,user=mysql_user, password=mysql_password,db=mysql_db,charset = 'utf8') print("数据库连接成功") df = pd.read_sql(sql,con=db) db.close mpl.rcParams["font.sans-serif"] = ["SimHei"] mpl.rcParams["axes.unicode_minus"] = False df['enterprise_id'] = 'shihuibeijing' temp_actula_price = df['flow_amount'] df['status'] = df['status'].str.replace('1','等待充值') df['status'] = df['status'].str.replace('2','充值中') df['status'] = df['status'].str.replace('4','充值失败') df['status'] = df['status'].str.replace('6','充值成功') temp_callback_status = df['status'] df['channel_id'] = df['channel_id'].str.replace('104','亚杉移动网厅') df['channel_id'] = df['channel_id'].str.replace('92','人工充值移动') df['channel_id'] = df['channel_id'].str.replace('93','人工充值联通') df['channel_id'] = df['channel_id'].str.replace('94','人工充值电信') df['channel_id'] = df['channel_id'].str.replace('89','兆蓉移动') df['channel_id'] = df['channel_id'].str.replace('90','兆蓉联通') df['channel_id'] = df['channel_id'].str.replace('91','兆蓉电信') df['channel_id'] = df['channel_id'].str.replace('95','兆蓉移动网厅') df['channel_id'] = df['channel_id'].str.replace('116','兆蓉联通网厅') df['channel_id'] = df['channel_id'].str.replace('105','兆蓉电信网厅') df['channel_id'] = df['channel_id'].str.replace('83','满帆移动网厅') df['channel_id'] = df['channel_id'].str.replace('84','满帆联通网厅') df['channel_id'] = df['channel_id'].str.replace('85','满帆电信网厅') df['channel_id'] = df['channel_id'].str.replace('96','自动补单移动通道') df['channel_id'] = df['channel_id'].str.replace('97','自动补单联通通道') df['channel_id'] = df['channel_id'].str.replace('98','自动补单电信通道') df['channel_id'] = df['channel_id'].str.replace('113','自动补单移动通道2') df['channel_id'] = df['channel_id'].str.replace('114','自动补单联通通道2') df['channel_id'] = df['channel_id'].str.replace('115','自动补单电信通道2') province = [] #省份 provider = [] #运营商 for area in df['mobile_home']: province.append(area.split('-',2)[0]) provider.append(area.split('-',2)[2]) #print(area) ''' col_name = df.columns.tolist() col_name.insert(col_name.index('used_mobile'),'actual_price') col_name.insert(col_name.index('mobile_home')+1,'area') col_name.insert(col_name.index('status')+1,'callback_status') df.reindex(columns=col_name) ''' df = df.reindex(columns=['order_id','enterprise_id','flow_amount','actual_price','used_mobile','mobile_home','area', 'apply_date','check_time','status','use_time','callback_status','channel_id']) df['order_id'] = df['order_id'].apply(str) df['actual_price'] = temp_actula_price df['callback_status'] = temp_callback_status df['area'] = pd.Series(province) df['mobile_home'] = pd.Series(provider) #print(df['mobile_home']) provider_order = df['mobile_home'].value_counts() #运营商订单数 provider_money = df.groupby('mobile_home').agg({'flow_amount':'sum'})#运行商面额 periods = pd.PeriodIndex(df['apply_date'],freq='D') df['datetime'] = periods every_money = df.groupby('datetime').agg({'flow_amount':'sum'}) #每日充值面额 #print(provider_money) #print(every_money['flow_amount'].values) pay_status = df['status'].value_counts() #充值状态占比 pay_price = df['flow_amount'].value_counts()#充值面额占比 pay_area = df['area'].value_counts().sort_values(ascending=False)#充值地域占比 #print(pay_area) pay_time = [] for i in df['use_time'].values: if i/60 < 10: pay_time.append('10分钟以内') elif 10 <= i/60 < 20: pay_time.append('10-20分钟') elif 20 <= i/60 < 30: pay_time.append('20-30分钟') elif 30 <= i/60 < 40: pay_time.append('30-40分钟') elif 40 <= i/60 < 50: pay_time.append('40-50分钟') elif 50 <= i/60 <= 60: pay_time.append('50-60分钟') else : pay_time.append('60分钟以上') use_time_count = pd.value_counts(pay_time).sort_index()#充值时长占比 #print(use_time_count) df.drop(['datetime'],axis=1,inplace=True) df.columns = ['订单号','应用id','充值面额','实付金额','手机号','运营商','归属地','发送日期','回调日期', '订单状态','用时(秒)','回调状态','通道名称'] #UPLOAD_FOLDER = './count_data/北京十荟每月数据统计.xlsx' #print(222) #UPLOAD_FOLDER = './北京十荟每月数据统计.csv' UPLOAD_FOLDER = './北京十荟{}月数据统计.xlsx'.format(pre_month.month) print(111) writer = pd.ExcelWriter(UPLOAD_FOLDER, engine='xlsxwriter') df.to_excel(writer,sheet_name=u'数据统计',index = False,encoding='utf_8_sig') sheet2 = writer.book.add_worksheet(u'图表统计') #绘制本月GMV折线图 fig = plt.figure(figsize=(10,5),dpi=200) x = range(1,pre_month.day+1) y = every_money['flow_amount'].values plt.plot(x,y) _xtick_labels = ['{}月{}日'.format(pre_month.month,i) for i in range(1,pre_month.day+1)] plt.xticks(x,_xtick_labels,rotation=45) plt.yticks(list(y)[::6],list(y)[::6]) plt.xlabel('时间') plt.ylabel('金额 单位(元)') plt.grid(alpha=0.1)#绘制网格 #plt.legend(loc='upper left') #图例 plt.title('本月GMV') plt.savefig('every_money.png') #绘制运营商订单占比饼图 fig = plt.figure(figsize=(2.5,2.5),dpi=180) labels = list(provider_order.index) #print(labels) #imagedata=BytesIO() colors = ['#1E90FF','#008000','#FFA500'] plt.pie(provider_order,labels=labels,colors=colors,autopct='%1.0f%%', shadow=False, startangle=90) plt.xticks(provider_order) plt.axis('equal') #plt.legend(loc=2) plt.title('运营商订单比例') plt.savefig('provider_order.png') #绘制充值时长柱状图 fig = plt.figure(figsize=(6,6),dpi=180) a = list(use_time_count.index) b = list(use_time_count.values) plt.bar(range(len(a)),b,width=0.6) plt.xticks(range(len(a)),a,rotation=45) plt.grid(alpha=0.1) plt.title('充值时长占比') plt.savefig('use_time.png') #绘制运营商成交面额占比饼图 fig = plt.figure(figsize=(2.5,2.5),dpi=180) labels_money = list(provider_money.index) #print(labels_money) colors = ['#1E90FF','#008000','#FFA500'] plt.pie(provider_money['flow_amount'].values,labels=labels_money,colors=colors,autopct='%1.2f%%', shadow=False,startangle=90) plt.xticks(provider_money['flow_amount'].values) plt.axis('equal') #plt.legend() plt.title('运营商成交面额比例') plt.savefig('provider_money.png') #绘制充值状态占比饼图 fig = plt.figure(figsize=(5,3.2),dpi=180) labels = list(pay_status.index) explode = (0.3,0,0) plt.pie(pay_status.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2) plt.xticks(pay_status.values) plt.axis('equal') plt.title('充值状态占比\n') plt.savefig('pay_status.png') #绘制充值面额占比饼图 fig = plt.figure(figsize=(5,3.2),dpi=180) _labels = list(pay_price.index) labels = [int(_labels[i]) for i in range(len(_labels))] explode = (0,0,0,0,0.1,0.3) plt.pie(pay_price.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1) plt.xticks(pay_price.values) plt.axis('equal') plt.title('充值面额占比\n') plt.savefig('pay_price.png') #绘制充值地域柱状图 fig = plt.figure(figsize=(14,8),dpi=180) a = list(pay_area.index) b = list(pay_area.values) plt.barh(range(len(a)),b,height=0.6,color='orange') #plt.xticks(b[::2],b[::2]) plt.yticks(range(len(a)),a) plt.grid(alpha=0.1) plt.title('地域分布') plt.savefig('area.png') sheet2.insert_image(0,0,'every_money.png',{'x_offset': 5, 'y_offset': 5}) sheet2.insert_image(38,0,'provider_order.png',{'x_offset': 5, 'y_offset': 5}) sheet2.insert_image(28,5,'use_time.png',{'x_offset': 5, 'y_offset': 5}) sheet2.insert_image(38,14,'provider_money.png',{'x_offset': 5, 'y_offset': 5}) sheet2.insert_image(63,0,'pay_status.png',{'x_offset': 10, 'y_offset': 5}) sheet2.insert_image(63,10,'pay_price.png',{'x_offset': 5, 'y_offset': 5}) sheet2.insert_image(84,0,'area.png',{'x_offset': 0, 'y_offset': 3}) print(33) writer.save() writer.close() #df.to_csv(UPLOAD_FOLDER,index=False,encoding='utf-8-sig') if __name__ == '__main__': today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) delta = timedelta(days=1) #求该月第一天 first_day = date(today.year, today.month, 1) #print(u'该月第一天:' + str(first_day)) #前一个月最后一天 pre_month = first_day - delta #timedelta是一个不错的函数 #print(u'前一个月最后一天:' + str(pre_month)) #前一个月的第一天 first_day_of_pre_month = date(pre_month.year, pre_month.month, 1) #print(first_day_of_pre_month) time_start = str(first_day_of_pre_month) + ' 00:00:00' time_end = str(pre_month) + ' 23:59:59' #print(time_start) #print(time_end) start_year = today.year end_week = int(today.strftime("%U")) + 1 print(end_week) end_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status, (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id FROM flow_order_info WHERE apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54 ORDER BY apply_date ''' temp_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status, (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id FROM flow_order_info_2020{} WHERE apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54 ''' sql = '' for i in range(end_week-6,end_week+1): if i == end_week: sql += end_sql.format(time_start,time_end) else : sql += temp_sql.format(i,time_start,time_end) + 'UNION' #print(sql) get_data()