customer_count.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. # encoding=utf8
  2. import pymysql
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib as mpl
  6. import matplotlib.pyplot as plt
  7. from io import BytesIO
  8. from datetime import datetime, date
  9. from datetime import timedelta
  10. import xlsxwriter
  11. DATE = None
  12. STEP = None
  13. today = datetime.today()
  14. year = today.year
  15. month = today.month
  16. if month == 1:
  17. month = 12
  18. year -= 1
  19. else:
  20. month -= 1
  21. date1 = datetime(year, month, 1)
  22. date2 = datetime(today.year, today.month, 1)
  23. date1_week = date1.isocalendar()[1]
  24. date2_week = date2.isocalendar()[1]
  25. if DATE is None:
  26. DATE = date1
  27. if STEP is None:
  28. STEP = (date2 - date1).days
  29. def get_data():
  30. mysql_host = '47.95.217.180'
  31. mysql_port = 3306
  32. mysql_user = 'rtech'
  33. mysql_password = 'S5RE4dQ65MphYk7F'
  34. mysql_db = 'fmp'
  35. db = pymysql.connect(host=mysql_host,port=mysql_port,user=mysql_user, password=mysql_password,db=mysql_db,charset = 'utf8')
  36. print("数据库连接成功")
  37. df = pd.read_sql(sql,con=db)
  38. db.close
  39. mpl.rcParams["font.sans-serif"] = ["SimHei"]
  40. mpl.rcParams["axes.unicode_minus"] = False
  41. df['enterprise_id'] = 'shihuibeijing'
  42. temp_actula_price = df['flow_amount']
  43. total_flow_amount = df['flow_amount'].sum(axis = 0)
  44. print(total_flow_amount)
  45. df['status'] = df['status'].str.replace('1','等待充值')
  46. df['status'] = df['status'].str.replace('2','充值中')
  47. df['status'] = df['status'].str.replace('4','充值失败')
  48. df['status'] = df['status'].str.replace('6','充值成功')
  49. temp_callback_status = df['status']
  50. '''
  51. df['channel_id'] = df['channel_id'].str.replace('104','亚杉移动网厅')
  52. df['channel_id'] = df['channel_id'].str.replace('117','亚杉电信网厅')
  53. df['channel_id'] = df['channel_id'].str.replace('92','人工充值移动')
  54. df['channel_id'] = df['channel_id'].str.replace('93','人工充值联通')
  55. df['channel_id'] = df['channel_id'].str.replace('94','人工充值电信')
  56. df['channel_id'] = df['channel_id'].str.replace('89','兆蓉移动')
  57. df['channel_id'] = df['channel_id'].str.replace('90','兆蓉联通')
  58. df['channel_id'] = df['channel_id'].str.replace('91','兆蓉电信')
  59. df['channel_id'] = df['channel_id'].str.replace('95','兆蓉移动网厅')
  60. df['channel_id'] = df['channel_id'].str.replace('116','兆蓉联通网厅')
  61. df['channel_id'] = df['channel_id'].str.replace('105','兆蓉电信网厅')
  62. df['channel_id'] = df['channel_id'].str.replace('83','满帆移动网厅')
  63. df['channel_id'] = df['channel_id'].str.replace('84','满帆联通网厅')
  64. df['channel_id'] = df['channel_id'].str.replace('85','满帆电信网厅')
  65. df['channel_id'] = df['channel_id'].str.replace('96','自动补单移动通道')
  66. df['channel_id'] = df['channel_id'].str.replace('97','自动补单联通通道')
  67. df['channel_id'] = df['channel_id'].str.replace('98','自动补单电信通道')
  68. df['channel_id'] = df['channel_id'].str.replace('113','自动补单移动通道2')
  69. df['channel_id'] = df['channel_id'].str.replace('114','自动补单联通通道2')
  70. df['channel_id'] = df['channel_id'].str.replace('115','自动补单电信通道2')
  71. df['channel_id'] = df['channel_id'].str.replace('118','智信全国移动')
  72. df['channel_id'] = df['channel_id'].str.replace('119','智信全国联通')
  73. df['channel_id'] = df['channel_id'].str.replace('120','智信全国电信')
  74. df['channel_id'] = df['channel_id'].str.replace('121','易迅捷移动网厅')
  75. df['channel_id'] = df['channel_id'].str.replace('122','易迅捷电信网厅')
  76. df['channel_id'] = df['channel_id'].str.replace('123','易迅捷联通网厅')
  77. df['channel_id'] = df['channel_id'].str.replace('124','亚杉联通网厅')
  78. '''
  79. province = [] #省份
  80. provider = [] #运营商
  81. for area in df['mobile_home']:
  82. province.append(area.split('-',2)[0])
  83. provider.append(area.split('-',2)[2])
  84. #print(area)
  85. '''
  86. col_name = df.columns.tolist()
  87. col_name.insert(col_name.index('used_mobile'),'actual_price')
  88. col_name.insert(col_name.index('mobile_home')+1,'area')
  89. col_name.insert(col_name.index('status')+1,'callback_status')
  90. df.reindex(columns=col_name)
  91. '''
  92. df = df.reindex(columns=['order_id','enterprise_id','flow_amount','actual_price','used_mobile','mobile_home','area',
  93. 'apply_date','check_time','status','use_time','callback_status','channel_name'])
  94. df['order_id'] = df['order_id'].apply(str)
  95. df['actual_price'] = temp_actula_price
  96. df['callback_status'] = temp_callback_status
  97. df['area'] = pd.Series(province)
  98. df['mobile_home'] = pd.Series(provider)
  99. #print(df['mobile_home'])
  100. provider_order = df['mobile_home'].value_counts() #运营商订单数
  101. provider_money = df.groupby('mobile_home').agg({'flow_amount':'sum'})#运行商面额
  102. periods = pd.PeriodIndex(df['apply_date'],freq='D')
  103. df['datetime'] = periods
  104. every_money = df.groupby('datetime').agg({'flow_amount':'sum'}) #每日充值面额
  105. #print(provider_money)
  106. #print(every_money['flow_amount'].values)
  107. pay_status = df['status'].value_counts() #充值状态占比
  108. pay_price = df.groupby('flow_amount').agg({'flow_amount':'sum'})#充值面额占比
  109. pay_area = df['area'].value_counts().sort_values(ascending=False)#充值地域占比
  110. #print(pay_price)
  111. pay_time = []
  112. '''
  113. for i in df['use_time'].values:
  114. if i/60 < 10:
  115. pay_time.append('0-10分钟')
  116. elif 10 <= i/60 < 20:
  117. pay_time.append('10-20分钟')
  118. elif 20 <= i/60 < 30:
  119. pay_time.append('20-30分钟')
  120. elif 30 <= i/60 < 40:
  121. pay_time.append('30-40分钟')
  122. elif 40 <= i/60 < 50:
  123. pay_time.append('40-50分钟')
  124. elif 50 <= i/60 <= 60:
  125. pay_time.append('50-60分钟')
  126. else :
  127. pay_time.append('60分钟以上')
  128. #pass
  129. '''
  130. for i in df['use_time'].values:
  131. if i/60 < 20:
  132. pay_time.append('0-10分钟')
  133. elif 20 <= i/60 < 30:
  134. pay_time.append('10-20分钟')
  135. elif 30 <= i/60 < 40:
  136. pay_time.append('20-30分钟')
  137. elif 40 <= i/60 < 50:
  138. pay_time.append('30-40分钟')
  139. elif 50 <= i/60 < 60:
  140. pay_time.append('40分钟以上')
  141. else :
  142. #pay_time.append('60分钟以上')
  143. pass
  144. use_time_count = pd.value_counts(pay_time).sort_index()#充值时长占比
  145. #use_time_count = pd.value_counts(pay_time)#充值时长占比
  146. use_df = pd.DataFrame(use_time_count)
  147. #print(use_time_count)
  148. #print(type(use_time_count))
  149. #print(use_df)
  150. use_df = use_df.div(use_df.sum(axis=0), axis=1)
  151. #print(a[0].values[0])
  152. df.drop(['datetime'],axis=1,inplace=True)
  153. df.columns = ['订单号','应用id','充值面额','实付金额','手机号','运营商','归属地','发送日期','回调日期',
  154. '订单状态','用时(秒)','回调状态','通道名称']
  155. #UPLOAD_FOLDER = './count_data/北京十荟每月数据统计.xlsx'
  156. #print(222)
  157. #UPLOAD_FOLDER = './北京十荟每月数据统计.csv'
  158. UPLOAD_FOLDER = './北京十荟{}月数据统计.xlsx'.format(pre_month.month)
  159. print(111)
  160. writer = pd.ExcelWriter(UPLOAD_FOLDER, engine='xlsxwriter')
  161. #df.to_excel(writer,sheet_name=u'数据统计',index = False,encoding='utf_8_sig')
  162. workbook = writer.book
  163. sheet2 = workbook.add_worksheet(u'图表统计')
  164. #设置格式
  165. style1 = workbook.add_format({'fg_color':'#FFFFFF'}) #背景色
  166. style2 = workbook.add_format({'top':2,'fg_color':'#FFFFFF'}) #上边框
  167. style3 = workbook.add_format({'left':2,'fg_color':'#FFFFFF'}) #左边框
  168. style4 = workbook.add_format({'top':2,'left':2,'fg_color':'#FFFFFF'})
  169. style5 = workbook.add_format({'font_size':25,'bold':True,'align':'center','valign':'vcenter','fg_color':'#20B2AA'}) #
  170. for row in range(150):
  171. if(row<3):
  172. sheet2.set_row(row,25,style1)
  173. else:
  174. sheet2.set_row(row,None,style1)
  175. for col in range(1,22):
  176. sheet2.write(1,col,'',style2)
  177. sheet2.write(128,col,'',style2)
  178. for row in range(1,128):
  179. sheet2.write(row,1,'',style3)
  180. sheet2.write(row,22,'',style3)
  181. sheet2.write(1,1,'',style4)
  182. sheet2.merge_range('B2:V3', '数据月报--十荟团', style5)
  183. #绘制本月GMV折线图
  184. fig = plt.figure(figsize=(10,5),dpi=200)
  185. print(pre_month.day)
  186. #sleep(5)
  187. x = range(1,pre_month.day+1)
  188. y = every_money['flow_amount'].values
  189. plt.plot(x,y)
  190. _xtick_labels = ['{}月{}日'.format(pre_month.month,i) for i in range(1,pre_month.day+1)]
  191. plt.xticks(x,_xtick_labels,rotation=45)
  192. plt.yticks(list(y)[::6],list(y)[::6])
  193. plt.xlabel('时间')
  194. plt.ylabel('金额 单位(元)')
  195. plt.grid(alpha=0.1)#绘制网格
  196. #plt.legend(loc='upper left') #图例
  197. plt.title('本月GMV总数为 {} 元'.format(total_flow_amount))
  198. plt.savefig('every_money.png')
  199. #绘制运营商订单占比饼图
  200. fig = plt.figure(figsize=(2.5,2.5),dpi=180)
  201. labels = list(provider_order.index)
  202. #print(labels)
  203. #imagedata=BytesIO()
  204. colors = ['#1E90FF','#008000','#FFA500']
  205. plt.pie(provider_order,labels=labels,colors=colors,autopct='%1.0f%%', shadow=False, startangle=90)
  206. plt.xticks(provider_order)
  207. plt.axis('equal')
  208. #plt.legend(loc=2)
  209. plt.title('运营商订单比例')
  210. plt.savefig('provider_order.png')
  211. #绘制充值时长柱状图
  212. fig = plt.figure(figsize=(6,6),dpi=180)
  213. a = list(use_time_count.index)
  214. _b = list(use_df[0].values)
  215. #print(_b)
  216. b = list(use_time_count.values)
  217. plt.bar(range(len(a)),b,width=0.6)
  218. plt.xticks(range(len(a)),a,rotation=45)
  219. #count=0
  220. for x,y in enumerate(_b):
  221. #print(x)
  222. #print(y)
  223. plt.text(x, b[x]+200, '{:.2f}'.format(y * 100) + '%', ha='center', va= 'bottom')
  224. #count += 1
  225. plt.xlabel('充值时长')
  226. #plt.ylabel('订单数量')
  227. plt.grid(alpha=0.1)
  228. plt.title('充值时长占比')
  229. plt.savefig('use_time.png')
  230. #绘制运营商成交面额占比饼图
  231. fig = plt.figure(figsize=(2.5,2.5),dpi=180)
  232. labels_money = list(provider_money.index)
  233. #print(labels_money)
  234. colors = ['#1E90FF','#008000','#FFA500']
  235. plt.pie(provider_money['flow_amount'].values,labels=labels_money,colors=colors,autopct='%1.2f%%', shadow=False,startangle=90)
  236. plt.xticks(provider_money['flow_amount'].values)
  237. plt.axis('equal')
  238. #plt.legend()
  239. plt.title('运营商成交面额比例')
  240. plt.savefig('provider_money.png')
  241. #绘制充值状态占比饼图
  242. fig = plt.figure(figsize=(5,3.2),dpi=180)
  243. labels = list(pay_status.index)
  244. #explode = (0.3,0,0)
  245. #plt.pie(pay_status.values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)
  246. plt.pie(pay_status.values,labels=labels,autopct='%1.2f%%', shadow=False,startangle=0,pctdistance = 0.6,labeldistance=1.2)
  247. plt.xticks(pay_status.values)
  248. plt.axis('equal')
  249. plt.title('充值状态占比\n')
  250. plt.savefig('pay_status.png')
  251. #绘制充值面额占比饼图
  252. fig = plt.figure(figsize=(5,3.2),dpi=180)
  253. _labels = list(pay_price.index)
  254. labels = [int(_labels[i]) for i in range(len(_labels))]
  255. explode = (0.2,0,0,0,0)
  256. #explode = (0.1,0,0,0,0.2,0.3)
  257. plt.pie(pay_price['flow_amount'].values,explode =explode,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1)
  258. #plt.pie(pay_price['flow_amount'].values,labels=labels,autopct='%1.2f%%', shadow=False,startangle=90,pctdistance = 0.6,labeldistance=1.1)
  259. plt.xticks(pay_price.values)
  260. plt.axis('equal')
  261. plt.title('充值面额占比\n')
  262. plt.savefig('pay_price.png')
  263. #绘制充值地域柱状图
  264. fig = plt.figure(figsize=(12,8),dpi=180)
  265. a = list(pay_area.index)
  266. b = list(pay_area.values)
  267. plt.barh(range(len(a)),b,height=0.6,color='orange')
  268. #plt.xticks(b[::2],b[::2])
  269. plt.yticks(range(len(a)),a)
  270. for y, x in enumerate(b):
  271. plt.text(x+1500, y-0.3, "%.0f" %x,ha='center', va= 'bottom')
  272. plt.xlabel('订单数量')
  273. plt.ylabel('省份')
  274. plt.grid(alpha=0.1)
  275. plt.title('地域分布')
  276. plt.savefig('area.png')
  277. sheet2.insert_image(5,3,'every_money.png',{'x_offset': 5, 'y_offset': 5})
  278. sheet2.insert_image(45,3,'provider_order.png',{'x_offset': 5, 'y_offset': 5})
  279. sheet2.insert_image(35,8,'use_time.png',{'x_offset': 5, 'y_offset': 5})
  280. sheet2.insert_image(45,17,'provider_money.png',{'x_offset': 5, 'y_offset': 5})
  281. sheet2.insert_image(69,3,'pay_status.png',{'x_offset': 10, 'y_offset': 5})
  282. sheet2.insert_image(69,13,'pay_price.png',{'x_offset': 5, 'y_offset': 5})
  283. sheet2.insert_image(88,3,'area.png',{'x_offset': 0, 'y_offset': 3})
  284. print(33)
  285. writer.save()
  286. writer.close()
  287. #df.to_csv(UPLOAD_FOLDER,index=False,encoding='utf-8-sig')
  288. if __name__ == '__main__':
  289. #today = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
  290. #delta = timedelta(days=1)
  291. #求该月第一天
  292. #first_day = date(today.year, today.month, 1)
  293. #print(u'该月第一天:' + str(first_day))
  294. #前一个月最后一天
  295. pre_month = date2 - timedelta(days=1)
  296. #print(u'前一个月最后一天:' + str(pre_month))
  297. #前一个月的第一天
  298. #first_day_of_pre_month = date(pre_month.year, pre_month.month, 1)
  299. #print(first_day_of_pre_month)
  300. time_start = str(date1)
  301. time_end = str(date2)
  302. #print(time_start)
  303. #print(time_end)
  304. start_year = today.year
  305. end_week = int(today.strftime("%U")) + 1
  306. #print('end_week is {}'.format(end_week))
  307. #print('date2_week is: {}'.format(date2_week))
  308. end_sql = '''SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,
  309. check_time,status,use_time,channel_name
  310. FROM
  311. (SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
  312. (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
  313. FROM
  314. flow_order_info
  315. WHERE
  316. apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54 ) t1 LEFT JOIN access_channel_info
  317. ON t1.channel_id = access_channel_info.channel_seq_id
  318. ORDER BY apply_date '''
  319. temp_sql = ''' SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,
  320. check_time,status,use_time,channel_name
  321. FROM
  322. (SELECT order_id,enterprise_id,flow_amount,used_mobile,mobile_home,apply_date,check_time,status,
  323. (UNIX_TIMESTAMP(check_time)- UNIX_TIMESTAMP(apply_date)) AS 'use_time',channel_id
  324. FROM
  325. flow_order_info_{}
  326. WHERE
  327. apply_date BETWEEN '{}' AND '{}' AND enterprise_id = 54) t1 LEFT JOIN access_channel_info
  328. ON t1.channel_id = access_channel_info.channel_seq_id '''
  329. sql = ''
  330. for i in range(date1_week,date2_week+1):
  331. str_table_suffix = "{0}{1:02d}".format(year,i)
  332. if i == date2_week:
  333. if (today - date2) < timedelta(days=7):
  334. sql += end_sql.format(time_start,time_end)
  335. else:
  336. sql += temp_sql.format(str_table_suffix,time_start,time_end)
  337. else :
  338. sql += temp_sql.format(str_table_suffix,time_start,time_end) + 'UNION'
  339. #print(sql)
  340. get_data()