| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 | # encoding=utf8import pymysqlimport pandas as pdimport numpy as npimport matplotlib as mplimport matplotlib.pyplot as pltfrom io import BytesIOfrom datetime import datetime, datefrom datetime import timedeltaimport  xlsxwriter  def get_data():  mysql_host = '47.95.217.180'  mysql_port = 3306  mysql_user = 'rtech'  mysql_password = 'S5RE4dQ65MphYk7F'  mysql_db = 'fmp'  db = pymysql.connect(host=mysql_host,port=mysql_port,user=mysql_user, password=mysql_password,db=mysql_db,charset = 'utf8')  print("数据库连接成功")  df = pd.read_sql(sql,con=db)  db.close    mpl.rcParams["font.sans-serif"] = ["SimHei"]  mpl.rcParams["axes.unicode_minus"] = False    df['enterprise_id'] = 'shihuibeijing'  temp_actula_price = df['flow_amount']     df['status'] = df['status'].str.replace('1','等待充值')  df['status'] = df['status'].str.replace('2','充值中')  df['status'] = df['status'].str.replace('4','充值失败')  df['status'] = df['status'].str.replace('6','充值成功')   temp_callback_status = df['status']   df['channel_id'] = df['channel_id'].str.replace('104','亚杉移动网厅')  df['channel_id'] = df['channel_id'].str.replace('117','亚杉电信网厅')  df['channel_id'] = df['channel_id'].str.replace('92','人工充值移动')  df['channel_id'] = df['channel_id'].str.replace('93','人工充值联通')  df['channel_id'] = df['channel_id'].str.replace('94','人工充值电信')  df['channel_id'] = df['channel_id'].str.replace('89','兆蓉移动')  df['channel_id'] = df['channel_id'].str.replace('90','兆蓉联通')  df['channel_id'] = df['channel_id'].str.replace('91','兆蓉电信')  df['channel_id'] = df['channel_id'].str.replace('95','兆蓉移动网厅')  df['channel_id'] = df['channel_id'].str.replace('116','兆蓉联通网厅')  df['channel_id'] = df['channel_id'].str.replace('105','兆蓉电信网厅')  df['channel_id'] = df['channel_id'].str.replace('83','满帆移动网厅')  df['channel_id'] = df['channel_id'].str.replace('84','满帆联通网厅')  df['channel_id'] = df['channel_id'].str.replace('85','满帆电信网厅')  df['channel_id'] = df['channel_id'].str.replace('96','自动补单移动通道')  df['channel_id'] = df['channel_id'].str.replace('97','自动补单联通通道')  df['channel_id'] = df['channel_id'].str.replace('98','自动补单电信通道')  df['channel_id'] = df['channel_id'].str.replace('113','自动补单移动通道2')  df['channel_id'] = df['channel_id'].str.replace('114','自动补单联通通道2')    df['channel_id'] = df['channel_id'].str.replace('115','自动补单电信通道2')  df['channel_id'] = df['channel_id'].str.replace('118','智信全国移动')  df['channel_id'] = df['channel_id'].str.replace('119','智信全国联通')  df['channel_id'] = df['channel_id'].str.replace('120','智信全国电信')  df['channel_id'] = df['channel_id'].str.replace('121','易迅捷移动网厅')  df['channel_id'] = df['channel_id'].str.replace('122','易迅捷电信网厅')  df['channel_id'] = df['channel_id'].str.replace('123','易迅捷联通网厅')  df['channel_id'] = df['channel_id'].str.replace('124','亚杉联通网厅')        province = [] #省份  provider = [] #运营商  for area in df['mobile_home']:      province.append(area.split('-',2)[0])      provider.append(area.split('-',2)[2])      #print(area)        '''  col_name = df.columns.tolist()  col_name.insert(col_name.index('used_mobile'),'actual_price')  col_name.insert(col_name.index('mobile_home')+1,'area')  col_name.insert(col_name.index('status')+1,'callback_status')  df.reindex(columns=col_name)  '''    df = df.reindex(columns=['order_id','enterprise_id','flow_amount','actual_price','used_mobile','mobile_home','area',                      'apply_date','check_time','status','use_time','callback_status','channel_id'])  df['order_id'] = df['order_id'].apply(str)  df['actual_price'] = temp_actula_price  df['callback_status'] = temp_callback_status  df['area'] = pd.Series(province)  df['mobile_home'] = pd.Series(provider)  #print(df['mobile_home'])    provider_order =  df['mobile_home'].value_counts() #运营商订单数  provider_money = df.groupby('mobile_home').agg({'flow_amount':'sum'})#运行商面额    periods = pd.PeriodIndex(df['apply_date'],freq='D')  df['datetime'] = periods  every_money = df.groupby('datetime').agg({'flow_amount':'sum'}) #每日充值面额  #print(provider_money)  #print(every_money['flow_amount'].values)    pay_status = df['status'].value_counts() #充值状态占比    pay_price = df.groupby('flow_amount').agg({'flow_amount':'sum'})#充值面额占比    pay_area = df['area'].value_counts().sort_values(ascending=False)#充值地域占比    #print(pay_price)       pay_time = []  for i in df['use_time'].values:    if i/60 < 10:        pay_time.append('0-10分钟')    elif 10 <= i/60 < 20:        pay_time.append('10-20分钟')    elif 20 <= i/60 < 30:        pay_time.append('20-30分钟')    elif 30 <= i/60 < 40:        pay_time.append('30-40分钟')    elif 40 <= i/60 < 50:        pay_time.append('40-50分钟')    elif 50 <= i/60 <= 60:        pay_time.append('50-60分钟')    else :        pay_time.append('60分钟以上')    use_time_count = pd.value_counts(pay_time).sort_index()#充值时长占比  #use_time_count = pd.value_counts(pay_time)#充值时长占比    #print(use_time_count)    df.drop(['datetime'],axis=1,inplace=True)         df.columns = ['订单号','应用id','充值面额','实付金额','手机号','运营商','归属地','发送日期','回调日期',                '订单状态','用时(秒)','回调状态','通道名称']  #UPLOAD_FOLDER = './count_data/北京十荟每月数据统计.xlsx'  #print(222)  #UPLOAD_FOLDER = './北京十荟每月数据统计.csv'  UPLOAD_FOLDER = './北京十荟{}月数据统计.xlsx'.format(pre_month.month)  print(111)       writer = pd.ExcelWriter(UPLOAD_FOLDER, engine='xlsxwriter')  df.to_excel(writer,sheet_name=u'数据统计',index = False,encoding='utf_8_sig')  workbook = writer.book  sheet2 = workbook.add_worksheet(u'图表统计')    #设置格式  style1 =  workbook.add_format({'fg_color':'#FFFFFF'}) #背景色  style2 =  workbook.add_format({'top':2,'fg_color':'#FFFFFF'}) #上边框  style3 =  workbook.add_format({'left':2,'fg_color':'#FFFFFF'}) #左边框  style4 =  workbook.add_format({'top':2,'left':2,'fg_color':'#FFFFFF'})   style5 =  workbook.add_format({'font_size':25,'bold':True,'align':'center','valign':'vcenter','fg_color':'#20B2AA'}) #  for row in range(150):      if(row<3):        sheet2.set_row(row,25,style1)      else:        sheet2.set_row(row,None,style1)          for col in range(1,22):      sheet2.write(1,col,'',style2)       sheet2.write(128,col,'',style2)         for row in range(1,128):      sheet2.write(row,1,'',style3)       sheet2.write(row,22,'',style3)   sheet2.write(1,1,'',style4)        sheet2.merge_range('B2:Y3', '数据月报--十荟团', style5)       #绘制本月GMV折线图  fig = plt.figure(figsize=(10,5),dpi=200)  x = range(1,pre_month.day+1)  y = every_money['flow_amount'].values  plt.plot(x,y)    _xtick_labels = ['{}月{}日'.format(pre_month.month,i) for i in range(1,pre_month.day+1)]  plt.xticks(x,_xtick_labels,rotation=45)  plt.yticks(list(y)[::6],list(y)[::6])  plt.xlabel('时间')  plt.ylabel('金额 单位(元)')   plt.grid(alpha=0.1)#绘制网格   #plt.legend(loc='upper left') #图例  plt.title('本月GMV')  plt.savefig('every_money.png')      #绘制运营商订单占比饼图  fig = plt.figure(figsize=(2.5,2.5),dpi=180)  labels = list(provider_order.index)  #print(labels)  #imagedata=BytesIO()  colors = ['#1E90FF','#008000','#FFA500']  plt.pie(provider_order,labels=labels,colors=colors,autopct='%1.0f%%', shadow=False, startangle=90)    plt.xticks(provider_order)  plt.axis('equal')  #plt.legend(loc=2)  plt.title('运营商订单比例')  plt.savefig('provider_order.png')      #绘制充值时长柱状图  fig = plt.figure(figsize=(6,6),dpi=180)  a = list(use_time_count.index)  b = list(use_time_count.values)  plt.bar(range(len(a)),b,width=0.6)  plt.xticks(range(len(a)),a,rotation=45)  for x, y in enumerate(b):    plt.text(x, y+500, "%.0f" %y, ha='center', va= 'bottom')  plt.xlabel('充值时长')  plt.ylabel('订单数量')  plt.grid(alpha=0.1)  plt.title('充值时长占比')  plt.savefig('use_time.png')      #绘制运营商成交面额占比饼图  fig = plt.figure(figsize=(2.5,2.5),dpi=180)  labels_money = list(provider_money.index)  #print(labels_money)  colors = ['#1E90FF','#008000','#FFA500']  plt.pie(provider_money['flow_amount'].values,labels=labels_money,colors=colors,autopct='%1.2f%%', shadow=False,startangle=90)   plt.xticks(provider_money['flow_amount'].values)  plt.axis('equal')  #plt.legend()  plt.title('运营商成交面额比例')  plt.savefig('provider_money.png')    #绘制充值状态占比饼图  fig = plt.figure(figsize=(5,3.2),dpi=180)  labels = list(pay_status.index)  #explode = (0.3,0,0)  #plt.pie(pay_status.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)   plt.pie(pay_status.values,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)   plt.xticks(pay_status.values)  plt.axis('equal')  plt.title('充值状态占比\n')  plt.savefig('pay_status.png')     #绘制充值面额占比饼图  fig = plt.figure(figsize=(5,3.2),dpi=180)  _labels = list(pay_price.index)  labels = [int(_labels[i])  for i in range(len(_labels))]  explode = (0.4,0,0,0,0.1,0.2)  plt.pie(pay_price['flow_amount'].values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1)   plt.xticks(pay_price.values)  plt.axis('equal')  plt.title('充值面额占比\n')  plt.savefig('pay_price.png')       #绘制充值地域柱状图  fig = plt.figure(figsize=(12,8),dpi=180)  a = list(pay_area.index)  b = list(pay_area.values)  plt.barh(range(len(a)),b,height=0.6,color='orange')    #plt.xticks(b[::2],b[::2])  plt.yticks(range(len(a)),a)  for y, x in enumerate(b):    plt.text(x+1100, y-0.3, "%.0f" %x,ha='center', va= 'bottom')  plt.xlabel('订单数量')  plt.ylabel('省份')  plt.grid(alpha=0.1)  plt.title('地域分布')  plt.savefig('area.png')    sheet2.insert_image(5,3,'every_money.png',{'x_offset': 5, 'y_offset': 5})   sheet2.insert_image(45,3,'provider_order.png',{'x_offset': 5, 'y_offset': 5})   sheet2.insert_image(35,8,'use_time.png',{'x_offset': 5, 'y_offset': 5})   sheet2.insert_image(45,17,'provider_money.png',{'x_offset': 5, 'y_offset': 5})   sheet2.insert_image(69,3,'pay_status.png',{'x_offset': 10, 'y_offset': 5})   sheet2.insert_image(69,13,'pay_price.png',{'x_offset': 5, 'y_offset': 5})   sheet2.insert_image(88,3,'area.png',{'x_offset': 0, 'y_offset': 3})        print(33)    writer.save()  writer.close()    #df.to_csv(UPLOAD_FOLDER,index=False,encoding='utf-8-sig')     if __name__ == '__main__':    today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)    delta = timedelta(days=1)    #求该月第一天    first_day = date(today.year, today.month, 1)    #print(u'该月第一天:' + str(first_day))    #前一个月最后一天    pre_month = first_day - delta #timedelta是一个不错的函数    #print(u'前一个月最后一天:' + str(pre_month))    #前一个月的第一天    first_day_of_pre_month = date(pre_month.year, pre_month.month, 1)    #print(first_day_of_pre_month)    time_start = str(first_day_of_pre_month) + ' 00:00:00'    time_end = str(pre_month) + ' 23:59:59'    #print(time_start)    #print(time_end)    start_year = today.year    end_week = int(today.strftime("%U")) + 1    print(end_week)    end_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,                       (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id                    FROM                        flow_order_info                    WHERE                            apply_date BETWEEN '{}'  AND '{}' AND enterprise_id = 54                      ORDER BY apply_date '''    temp_sql = ''' SELECT  order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,                    (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id                    FROM                        flow_order_info_2020{}                    WHERE                            apply_date BETWEEN '{}'  AND '{}'  AND enterprise_id = 54  '''    sql = ''    for i in range(end_week-10,end_week+1):        if i == end_week:            sql += end_sql.format(time_start,time_end)        else :            sql += temp_sql.format(i,time_start,time_end) + 'UNION'    #print(sql)        get_data() 
 |