customer_count.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. # encoding=utf8
  2. import pymysql
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib as mpl
  6. import matplotlib.pyplot as plt
  7. from io import BytesIO
  8. from datetime import datetime, date
  9. from datetime import timedelta
  10. import xlsxwriter
  11. def get_data():
  12. mysql_host = '47.95.217.180'
  13. mysql_port = 3306
  14. mysql_user = 'rtech'
  15. mysql_password = 'S5RE4dQ65MphYk7F'
  16. mysql_db = 'fmp'
  17. db = pymysql.connect(host=mysql_host,port=mysql_port,user=mysql_user, password=mysql_password,db=mysql_db,charset = 'utf8')
  18. print("数据库连接成功")
  19. df = pd.read_sql(sql,con=db)
  20. db.close
  21. mpl.rcParams["font.sans-serif"] = ["SimHei"]
  22. mpl.rcParams["axes.unicode_minus"] = False
  23. df['enterprise_id'] = 'shihuibeijing'
  24. temp_actula_price = df['flow_amount']
  25. df['status'] = df['status'].str.replace('1','等待充值')
  26. df['status'] = df['status'].str.replace('2','充值中')
  27. df['status'] = df['status'].str.replace('4','充值失败')
  28. df['status'] = df['status'].str.replace('6','充值成功')
  29. temp_callback_status = df['status']
  30. df['channel_id'] = df['channel_id'].str.replace('104','亚杉移动网厅')
  31. df['channel_id'] = df['channel_id'].str.replace('117','亚杉电信网厅')
  32. df['channel_id'] = df['channel_id'].str.replace('92','人工充值移动')
  33. df['channel_id'] = df['channel_id'].str.replace('93','人工充值联通')
  34. df['channel_id'] = df['channel_id'].str.replace('94','人工充值电信')
  35. df['channel_id'] = df['channel_id'].str.replace('89','兆蓉移动')
  36. df['channel_id'] = df['channel_id'].str.replace('90','兆蓉联通')
  37. df['channel_id'] = df['channel_id'].str.replace('91','兆蓉电信')
  38. df['channel_id'] = df['channel_id'].str.replace('95','兆蓉移动网厅')
  39. df['channel_id'] = df['channel_id'].str.replace('116','兆蓉联通网厅')
  40. df['channel_id'] = df['channel_id'].str.replace('105','兆蓉电信网厅')
  41. df['channel_id'] = df['channel_id'].str.replace('83','满帆移动网厅')
  42. df['channel_id'] = df['channel_id'].str.replace('84','满帆联通网厅')
  43. df['channel_id'] = df['channel_id'].str.replace('85','满帆电信网厅')
  44. df['channel_id'] = df['channel_id'].str.replace('96','自动补单移动通道')
  45. df['channel_id'] = df['channel_id'].str.replace('97','自动补单联通通道')
  46. df['channel_id'] = df['channel_id'].str.replace('98','自动补单电信通道')
  47. df['channel_id'] = df['channel_id'].str.replace('113','自动补单移动通道2')
  48. df['channel_id'] = df['channel_id'].str.replace('114','自动补单联通通道2')
  49. df['channel_id'] = df['channel_id'].str.replace('115','自动补单电信通道2')
  50. df['channel_id'] = df['channel_id'].str.replace('118','智信全国移动')
  51. df['channel_id'] = df['channel_id'].str.replace('119','智信全国联通')
  52. df['channel_id'] = df['channel_id'].str.replace('120','智信全国电信')
  53. df['channel_id'] = df['channel_id'].str.replace('121','易迅捷移动网厅')
  54. df['channel_id'] = df['channel_id'].str.replace('122','易迅捷电信网厅')
  55. df['channel_id'] = df['channel_id'].str.replace('123','易迅捷联通网厅')
  56. df['channel_id'] = df['channel_id'].str.replace('124','亚杉联通网厅')
  57. province = [] #省份
  58. provider = [] #运营商
  59. for area in df['mobile_home']:
  60. province.append(area.split('-',2)[0])
  61. provider.append(area.split('-',2)[2])
  62. #print(area)
  63. '''
  64. col_name = df.columns.tolist()
  65. col_name.insert(col_name.index('used_mobile'),'actual_price')
  66. col_name.insert(col_name.index('mobile_home')+1,'area')
  67. col_name.insert(col_name.index('status')+1,'callback_status')
  68. df.reindex(columns=col_name)
  69. '''
  70. df = df.reindex(columns=['order_id','enterprise_id','flow_amount','actual_price','used_mobile','mobile_home','area',
  71. 'apply_date','check_time','status','use_time','callback_status','channel_id'])
  72. df['order_id'] = df['order_id'].apply(str)
  73. df['actual_price'] = temp_actula_price
  74. df['callback_status'] = temp_callback_status
  75. df['area'] = pd.Series(province)
  76. df['mobile_home'] = pd.Series(provider)
  77. #print(df['mobile_home'])
  78. provider_order = df['mobile_home'].value_counts() #运营商订单数
  79. provider_money = df.groupby('mobile_home').agg({'flow_amount':'sum'})#运行商面额
  80. periods = pd.PeriodIndex(df['apply_date'],freq='D')
  81. df['datetime'] = periods
  82. every_money = df.groupby('datetime').agg({'flow_amount':'sum'}) #每日充值面额
  83. #print(provider_money)
  84. #print(every_money['flow_amount'].values)
  85. pay_status = df['status'].value_counts() #充值状态占比
  86. pay_price = df.groupby('flow_amount').agg({'flow_amount':'sum'})#充值面额占比
  87. pay_area = df['area'].value_counts().sort_values(ascending=False)#充值地域占比
  88. #print(pay_price)
  89. pay_time = []
  90. for i in df['use_time'].values:
  91. if i/60 < 10:
  92. pay_time.append('0-10分钟')
  93. elif 10 <= i/60 < 20:
  94. pay_time.append('10-20分钟')
  95. elif 20 <= i/60 < 30:
  96. pay_time.append('20-30分钟')
  97. elif 30 <= i/60 < 40:
  98. pay_time.append('30-40分钟')
  99. elif 40 <= i/60 < 50:
  100. pay_time.append('40-50分钟')
  101. elif 50 <= i/60 <= 60:
  102. pay_time.append('50-60分钟')
  103. else :
  104. pay_time.append('60分钟以上')
  105. use_time_count = pd.value_counts(pay_time).sort_index()#充值时长占比
  106. #use_time_count = pd.value_counts(pay_time)#充值时长占比
  107. #print(use_time_count)
  108. df.drop(['datetime'],axis=1,inplace=True)
  109. df.columns = ['订单号','应用id','充值面额','实付金额','手机号','运营商','归属地','发送日期','回调日期',
  110. '订单状态','用时(秒)','回调状态','通道名称']
  111. #UPLOAD_FOLDER = './count_data/北京十荟每月数据统计.xlsx'
  112. #print(222)
  113. #UPLOAD_FOLDER = './北京十荟每月数据统计.csv'
  114. UPLOAD_FOLDER = './北京十荟{}月数据统计.xlsx'.format(pre_month.month)
  115. print(111)
  116. writer = pd.ExcelWriter(UPLOAD_FOLDER, engine='xlsxwriter')
  117. df.to_excel(writer,sheet_name=u'数据统计',index = False,encoding='utf_8_sig')
  118. workbook = writer.book
  119. sheet2 = workbook.add_worksheet(u'图表统计')
  120. #设置格式
  121. style1 = workbook.add_format({'fg_color':'#FFFFFF'}) #背景色
  122. style2 = workbook.add_format({'top':2,'fg_color':'#FFFFFF'}) #上边框
  123. style3 = workbook.add_format({'left':2,'fg_color':'#FFFFFF'}) #左边框
  124. style4 = workbook.add_format({'top':2,'left':2,'fg_color':'#FFFFFF'})
  125. style5 = workbook.add_format({'font_size':25,'bold':True,'align':'center','valign':'vcenter','fg_color':'#20B2AA'}) #
  126. for row in range(150):
  127. if(row<3):
  128. sheet2.set_row(row,25,style1)
  129. else:
  130. sheet2.set_row(row,None,style1)
  131. for col in range(1,22):
  132. sheet2.write(1,col,'',style2)
  133. sheet2.write(128,col,'',style2)
  134. for row in range(1,128):
  135. sheet2.write(row,1,'',style3)
  136. sheet2.write(row,22,'',style3)
  137. sheet2.write(1,1,'',style4)
  138. sheet2.merge_range('B2:Y3', '数据月报--十荟团', style5)
  139. #绘制本月GMV折线图
  140. fig = plt.figure(figsize=(10,5),dpi=200)
  141. x = range(1,pre_month.day+1)
  142. y = every_money['flow_amount'].values
  143. plt.plot(x,y)
  144. _xtick_labels = ['{}月{}日'.format(pre_month.month,i) for i in range(1,pre_month.day+1)]
  145. plt.xticks(x,_xtick_labels,rotation=45)
  146. plt.yticks(list(y)[::6],list(y)[::6])
  147. plt.xlabel('时间')
  148. plt.ylabel('金额 单位(元)')
  149. plt.grid(alpha=0.1)#绘制网格
  150. #plt.legend(loc='upper left') #图例
  151. plt.title('本月GMV')
  152. plt.savefig('every_money.png')
  153. #绘制运营商订单占比饼图
  154. fig = plt.figure(figsize=(2.5,2.5),dpi=180)
  155. labels = list(provider_order.index)
  156. #print(labels)
  157. #imagedata=BytesIO()
  158. colors = ['#1E90FF','#008000','#FFA500']
  159. plt.pie(provider_order,labels=labels,colors=colors,autopct='%1.0f%%', shadow=False, startangle=90)
  160. plt.xticks(provider_order)
  161. plt.axis('equal')
  162. #plt.legend(loc=2)
  163. plt.title('运营商订单比例')
  164. plt.savefig('provider_order.png')
  165. #绘制充值时长柱状图
  166. fig = plt.figure(figsize=(6,6),dpi=180)
  167. a = list(use_time_count.index)
  168. b = list(use_time_count.values)
  169. plt.bar(range(len(a)),b,width=0.6)
  170. plt.xticks(range(len(a)),a,rotation=45)
  171. for x, y in enumerate(b):
  172. plt.text(x, y+500, "%.0f" %y, ha='center', va= 'bottom')
  173. plt.xlabel('充值时长')
  174. plt.ylabel('订单数量')
  175. plt.grid(alpha=0.1)
  176. plt.title('充值时长占比')
  177. plt.savefig('use_time.png')
  178. #绘制运营商成交面额占比饼图
  179. fig = plt.figure(figsize=(2.5,2.5),dpi=180)
  180. labels_money = list(provider_money.index)
  181. #print(labels_money)
  182. colors = ['#1E90FF','#008000','#FFA500']
  183. plt.pie(provider_money['flow_amount'].values,labels=labels_money,colors=colors,autopct='%1.2f%%', shadow=False,startangle=90)
  184. plt.xticks(provider_money['flow_amount'].values)
  185. plt.axis('equal')
  186. #plt.legend()
  187. plt.title('运营商成交面额比例')
  188. plt.savefig('provider_money.png')
  189. #绘制充值状态占比饼图
  190. fig = plt.figure(figsize=(5,3.2),dpi=180)
  191. labels = list(pay_status.index)
  192. #explode = (0.3,0,0)
  193. #plt.pie(pay_status.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)
  194. plt.pie(pay_status.values,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)
  195. plt.xticks(pay_status.values)
  196. plt.axis('equal')
  197. plt.title('充值状态占比\n')
  198. plt.savefig('pay_status.png')
  199. #绘制充值面额占比饼图
  200. fig = plt.figure(figsize=(5,3.2),dpi=180)
  201. _labels = list(pay_price.index)
  202. labels = [int(_labels[i]) for i in range(len(_labels))]
  203. explode = (0.4,0,0,0,0.1,0.2)
  204. plt.pie(pay_price['flow_amount'].values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1)
  205. plt.xticks(pay_price.values)
  206. plt.axis('equal')
  207. plt.title('充值面额占比\n')
  208. plt.savefig('pay_price.png')
  209. #绘制充值地域柱状图
  210. fig = plt.figure(figsize=(12,8),dpi=180)
  211. a = list(pay_area.index)
  212. b = list(pay_area.values)
  213. plt.barh(range(len(a)),b,height=0.6,color='orange')
  214. #plt.xticks(b[::2],b[::2])
  215. plt.yticks(range(len(a)),a)
  216. for y, x in enumerate(b):
  217. plt.text(x+1100, y-0.3, "%.0f" %x,ha='center', va= 'bottom')
  218. plt.xlabel('订单数量')
  219. plt.ylabel('省份')
  220. plt.grid(alpha=0.1)
  221. plt.title('地域分布')
  222. plt.savefig('area.png')
  223. sheet2.insert_image(5,3,'every_money.png',{'x_offset': 5, 'y_offset': 5})
  224. sheet2.insert_image(45,3,'provider_order.png',{'x_offset': 5, 'y_offset': 5})
  225. sheet2.insert_image(35,8,'use_time.png',{'x_offset': 5, 'y_offset': 5})
  226. sheet2.insert_image(45,17,'provider_money.png',{'x_offset': 5, 'y_offset': 5})
  227. sheet2.insert_image(69,3,'pay_status.png',{'x_offset': 10, 'y_offset': 5})
  228. sheet2.insert_image(69,13,'pay_price.png',{'x_offset': 5, 'y_offset': 5})
  229. sheet2.insert_image(88,3,'area.png',{'x_offset': 0, 'y_offset': 3})
  230. print(33)
  231. writer.save()
  232. writer.close()
  233. #df.to_csv(UPLOAD_FOLDER,index=False,encoding='utf-8-sig')
  234. if __name__ == '__main__':
  235. today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
  236. delta = timedelta(days=1)
  237. #求该月第一天
  238. first_day = date(today.year, today.month, 1)
  239. #print(u'该月第一天:' + str(first_day))
  240. #前一个月最后一天
  241. pre_month = first_day - delta #timedelta是一个不错的函数
  242. #print(u'前一个月最后一天:' + str(pre_month))
  243. #前一个月的第一天
  244. first_day_of_pre_month = date(pre_month.year, pre_month.month, 1)
  245. #print(first_day_of_pre_month)
  246. time_start = str(first_day_of_pre_month) + ' 00:00:00'
  247. time_end = str(pre_month) + ' 23:59:59'
  248. #print(time_start)
  249. #print(time_end)
  250. start_year = today.year
  251. end_week = int(today.strftime("%U")) + 1
  252. print(end_week)
  253. end_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
  254. (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
  255. FROM
  256. flow_order_info
  257. WHERE
  258. apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54
  259. ORDER BY apply_date '''
  260. temp_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
  261. (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
  262. FROM
  263. flow_order_info_2020{}
  264. WHERE
  265. apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54 '''
  266. sql = ''
  267. for i in range(end_week-10,end_week+1):
  268. if i == end_week:
  269. sql += end_sql.format(time_start,time_end)
  270. else :
  271. sql += temp_sql.format(i,time_start,time_end) + 'UNION'
  272. #print(sql)
  273. get_data()