123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- # encoding=utf8
- import pymysql
- import pandas as pd
- import numpy as np
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- from io import BytesIO
- from datetime import datetime, date
- from datetime import timedelta
- import xlsxwriter
- def get_data():
- mysql_host = '47.95.217.180'
- mysql_port = 3306
- mysql_user = 'rtech'
- mysql_password = 'S5RE4dQ65MphYk7F'
- mysql_db = 'fmp'
- db = pymysql.connect(host=mysql_host,port=mysql_port,user=mysql_user, password=mysql_password,db=mysql_db,charset = 'utf8')
- print("数据库连接成功")
- df = pd.read_sql(sql,con=db)
- db.close
-
- mpl.rcParams["font.sans-serif"] = ["SimHei"]
- mpl.rcParams["axes.unicode_minus"] = False
-
- df['enterprise_id'] = 'shihuibeijing'
- temp_actula_price = df['flow_amount']
-
-
- df['status'] = df['status'].str.replace('1','等待充值')
- df['status'] = df['status'].str.replace('2','充值中')
- df['status'] = df['status'].str.replace('4','充值失败')
- df['status'] = df['status'].str.replace('6','充值成功')
-
- temp_callback_status = df['status']
-
- df['channel_id'] = df['channel_id'].str.replace('104','亚杉移动网厅')
- df['channel_id'] = df['channel_id'].str.replace('92','人工充值移动')
- df['channel_id'] = df['channel_id'].str.replace('93','人工充值联通')
- df['channel_id'] = df['channel_id'].str.replace('94','人工充值电信')
- df['channel_id'] = df['channel_id'].str.replace('89','兆蓉移动')
- df['channel_id'] = df['channel_id'].str.replace('90','兆蓉联通')
- df['channel_id'] = df['channel_id'].str.replace('91','兆蓉电信')
- df['channel_id'] = df['channel_id'].str.replace('95','兆蓉移动网厅')
- df['channel_id'] = df['channel_id'].str.replace('116','兆蓉联通网厅')
- df['channel_id'] = df['channel_id'].str.replace('105','兆蓉电信网厅')
- df['channel_id'] = df['channel_id'].str.replace('83','满帆移动网厅')
- df['channel_id'] = df['channel_id'].str.replace('84','满帆联通网厅')
- df['channel_id'] = df['channel_id'].str.replace('85','满帆电信网厅')
- df['channel_id'] = df['channel_id'].str.replace('96','自动补单移动通道')
- df['channel_id'] = df['channel_id'].str.replace('97','自动补单联通通道')
- df['channel_id'] = df['channel_id'].str.replace('98','自动补单电信通道')
- df['channel_id'] = df['channel_id'].str.replace('113','自动补单移动通道2')
- df['channel_id'] = df['channel_id'].str.replace('114','自动补单联通通道2')
- df['channel_id'] = df['channel_id'].str.replace('115','自动补单电信通道2')
-
-
- province = [] #省份
- provider = [] #运营商
- for area in df['mobile_home']:
- province.append(area.split('-',2)[0])
- provider.append(area.split('-',2)[2])
- #print(area)
-
- '''
- col_name = df.columns.tolist()
- col_name.insert(col_name.index('used_mobile'),'actual_price')
- col_name.insert(col_name.index('mobile_home')+1,'area')
- col_name.insert(col_name.index('status')+1,'callback_status')
- df.reindex(columns=col_name)
- '''
-
- df = df.reindex(columns=['order_id','enterprise_id','flow_amount','actual_price','used_mobile','mobile_home','area',
- 'apply_date','check_time','status','use_time','callback_status','channel_id'])
- df['order_id'] = df['order_id'].apply(str)
- df['actual_price'] = temp_actula_price
- df['callback_status'] = temp_callback_status
- df['area'] = pd.Series(province)
- df['mobile_home'] = pd.Series(provider)
- #print(df['mobile_home'])
-
- provider_order = df['mobile_home'].value_counts() #运营商订单数
- provider_money = df.groupby('mobile_home').agg({'flow_amount':'sum'})#运行商面额
-
- periods = pd.PeriodIndex(df['apply_date'],freq='D')
- df['datetime'] = periods
- every_money = df.groupby('datetime').agg({'flow_amount':'sum'}) #每日充值面额
- #print(provider_money)
- #print(every_money['flow_amount'].values)
-
- pay_status = df['status'].value_counts() #充值状态占比
-
- pay_price = df['flow_amount'].value_counts()#充值面额占比
-
- pay_area = df['area'].value_counts().sort_values(ascending=False)#充值地域占比
- #print(pay_area)
-
-
-
- pay_time = []
- for i in df['use_time'].values:
- if i/60 < 10:
- pay_time.append('10分钟以内')
- elif 10 <= i/60 < 20:
- pay_time.append('10-20分钟')
- elif 20 <= i/60 < 30:
- pay_time.append('20-30分钟')
- elif 30 <= i/60 < 40:
- pay_time.append('30-40分钟')
- elif 40 <= i/60 < 50:
- pay_time.append('40-50分钟')
- elif 50 <= i/60 <= 60:
- pay_time.append('50-60分钟')
- else :
- pay_time.append('60分钟以上')
-
- use_time_count = pd.value_counts(pay_time).sort_index()#充值时长占比
-
- #print(use_time_count)
-
- df.drop(['datetime'],axis=1,inplace=True)
-
-
-
- df.columns = ['订单号','应用id','充值面额','实付金额','手机号','运营商','归属地','发送日期','回调日期',
- '订单状态','用时(秒)','回调状态','通道名称']
- #UPLOAD_FOLDER = './count_data/北京十荟每月数据统计.xlsx'
- #print(222)
- #UPLOAD_FOLDER = './北京十荟每月数据统计.csv'
- UPLOAD_FOLDER = './北京十荟{}月数据统计.xlsx'.format(pre_month.month)
- print(111)
-
-
-
- writer = pd.ExcelWriter(UPLOAD_FOLDER, engine='xlsxwriter')
- df.to_excel(writer,sheet_name=u'数据统计',index = False,encoding='utf_8_sig')
- sheet2 = writer.book.add_worksheet(u'图表统计')
-
- #绘制本月GMV折线图
- fig = plt.figure(figsize=(10,5),dpi=200)
- x = range(1,pre_month.day+1)
- y = every_money['flow_amount'].values
- plt.plot(x,y)
-
- _xtick_labels = ['{}月{}日'.format(pre_month.month,i) for i in range(1,pre_month.day+1)]
- plt.xticks(x,_xtick_labels,rotation=45)
- plt.yticks(list(y)[::6],list(y)[::6])
- plt.xlabel('时间')
- plt.ylabel('金额 单位(元)')
- plt.grid(alpha=0.1)#绘制网格
- #plt.legend(loc='upper left') #图例
- plt.title('本月GMV')
- plt.savefig('every_money.png')
-
-
- #绘制运营商订单占比饼图
- fig = plt.figure(figsize=(2.5,2.5),dpi=180)
- labels = list(provider_order.index)
- #print(labels)
- #imagedata=BytesIO()
- colors = ['#1E90FF','#008000','#FFA500']
- plt.pie(provider_order,labels=labels,colors=colors,autopct='%1.0f%%', shadow=False, startangle=90)
-
- plt.xticks(provider_order)
- plt.axis('equal')
- #plt.legend(loc=2)
- plt.title('运营商订单比例')
- plt.savefig('provider_order.png')
-
-
- #绘制充值时长柱状图
- fig = plt.figure(figsize=(6,6),dpi=180)
- a = list(use_time_count.index)
- b = list(use_time_count.values)
- plt.bar(range(len(a)),b,width=0.6)
- plt.xticks(range(len(a)),a,rotation=45)
- plt.grid(alpha=0.1)
- plt.title('充值时长占比')
- plt.savefig('use_time.png')
-
-
- #绘制运营商成交面额占比饼图
- fig = plt.figure(figsize=(2.5,2.5),dpi=180)
- labels_money = list(provider_money.index)
- #print(labels_money)
- colors = ['#1E90FF','#008000','#FFA500']
- plt.pie(provider_money['flow_amount'].values,labels=labels_money,colors=colors,autopct='%1.2f%%', shadow=False,startangle=90)
- plt.xticks(provider_money['flow_amount'].values)
- plt.axis('equal')
- #plt.legend()
- plt.title('运营商成交面额比例')
- plt.savefig('provider_money.png')
-
- #绘制充值状态占比饼图
- fig = plt.figure(figsize=(5,3.2),dpi=180)
- labels = list(pay_status.index)
- explode = (0.3,0,0)
- plt.pie(pay_status.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)
- plt.xticks(pay_status.values)
- plt.axis('equal')
- plt.title('充值状态占比\n')
- plt.savefig('pay_status.png')
-
- #绘制充值面额占比饼图
- fig = plt.figure(figsize=(5,3.2),dpi=180)
- _labels = list(pay_price.index)
- labels = [int(_labels[i]) for i in range(len(_labels))]
- explode = (0,0,0,0,0.1,0.3)
- plt.pie(pay_price.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1)
- plt.xticks(pay_price.values)
- plt.axis('equal')
- plt.title('充值面额占比\n')
- plt.savefig('pay_price.png')
-
-
- #绘制充值地域柱状图
- fig = plt.figure(figsize=(14,8),dpi=180)
- a = list(pay_area.index)
- b = list(pay_area.values)
- plt.barh(range(len(a)),b,height=0.6,color='orange')
- #plt.xticks(b[::2],b[::2])
- plt.yticks(range(len(a)),a)
- plt.grid(alpha=0.1)
- plt.title('地域分布')
- plt.savefig('area.png')
-
- sheet2.insert_image(0,0,'every_money.png',{'x_offset': 5, 'y_offset': 5})
- sheet2.insert_image(38,0,'provider_order.png',{'x_offset': 5, 'y_offset': 5})
- sheet2.insert_image(28,5,'use_time.png',{'x_offset': 5, 'y_offset': 5})
- sheet2.insert_image(38,14,'provider_money.png',{'x_offset': 5, 'y_offset': 5})
- sheet2.insert_image(63,0,'pay_status.png',{'x_offset': 10, 'y_offset': 5})
- sheet2.insert_image(63,10,'pay_price.png',{'x_offset': 5, 'y_offset': 5})
- sheet2.insert_image(84,0,'area.png',{'x_offset': 0, 'y_offset': 3})
-
-
- print(33)
-
- writer.save()
- writer.close()
-
- #df.to_csv(UPLOAD_FOLDER,index=False,encoding='utf-8-sig')
-
-
- if __name__ == '__main__':
- today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
- delta = timedelta(days=1)
- #求该月第一天
- first_day = date(today.year, today.month, 1)
- #print(u'该月第一天:' + str(first_day))
- #前一个月最后一天
- pre_month = first_day - delta #timedelta是一个不错的函数
- #print(u'前一个月最后一天:' + str(pre_month))
- #前一个月的第一天
- first_day_of_pre_month = date(pre_month.year, pre_month.month, 1)
- #print(first_day_of_pre_month)
- time_start = str(first_day_of_pre_month) + ' 00:00:00'
- time_end = str(pre_month) + ' 23:59:59'
- #print(time_start)
- #print(time_end)
- start_year = today.year
- end_week = int(today.strftime("%U")) + 1
- print(end_week)
- end_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
- (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
- FROM
- flow_order_info
- WHERE
- apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54
- ORDER BY apply_date '''
- temp_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
- (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
- FROM
- flow_order_info_2020{}
- WHERE
- apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54 '''
- sql = ''
- for i in range(end_week-6,end_week+1):
- if i == end_week:
- sql += end_sql.format(time_start,time_end)
- else :
- sql += temp_sql.format(i,time_start,time_end) + 'UNION'
- #print(sql)
-
- get_data()
-
|