Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Given a list of dictionaries this function will insert each dictionary as a row into the given database table* 

5 

6:Author: 

7 David Young 

8 

9:Date Created: 

10 June 21, 2016 

11""" 

12from __future__ import print_function 

13from __future__ import division 

14################# GLOBAL IMPORTS #################### 

15from builtins import str 

16from builtins import range 

17from past.utils import old_div 

18import sys 

19import os 

20os.environ['TERM'] = 'vt100' 

21from fundamentals import tools 

22from fundamentals.mysql import convert_dictionary_to_mysql_table, writequery 

23from fundamentals.fmultiprocess import fmultiprocess 

24import time 

25import re 

26from fundamentals.mysql.database import database 

27import pandas as pd 

28from datetime import datetime 

29import numpy as np 

30 

31 

32count = 0 

33totalCount = 0 

34globalDbConn = False 

35sharedList = [] 

36 

37 

38def insert_list_of_dictionaries_into_database_tables( 

39 dbConn, 

40 log, 

41 dictList, 

42 dbTableName, 

43 uniqueKeyList=[], 

44 dateModified=False, 

45 dateCreated=True, 

46 batchSize=2500, 

47 replace=False, 

48 dbSettings=False): 

49 """insert list of dictionaries into database tables 

50 

51 **Key Arguments:** 

52 - ``dbConn`` -- mysql database connection 

53 - ``log`` -- logger 

54 - ``dictList`` -- list of python dictionaries to add to the database table 

55 - ``dbTableName`` -- name of the database table 

56 - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database 

57 - ``dateModified`` -- add the modification date as a column in the database 

58 - ``dateCreated`` -- add the created date as a column in the database 

59 - ``batchSize`` -- batch the insert commands into *batchSize* batches 

60 - ``replace`` -- repalce row if a duplicate is found 

61 - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster) 

62 

63 **Return:** 

64 - None 

65 

66 **Usage:** 

67 

68 .. code-block:: python 

69 

70 from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables 

71 insert_list_of_dictionaries_into_database_tables( 

72 dbConn=dbConn, 

73 log=log, 

74 dictList=dictList, 

75 dbTableName="test_insert_many", 

76 uniqueKeyList=["col1", "col3"], 

77 dateModified=False, 

78 batchSize=2500 

79 ) 

80 """ 

81 

82 log.debug( 

83 'starting the ````insert_list_of_dictionaries_into_database_tables`` function') 

84 

85 global count 

86 global totalCount 

87 global globalDbConn 

88 global sharedList 

89 

90 reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

91 

92 if dbSettings: 

93 globalDbConn = dbSettings 

94 else: 

95 globalDbConn = dbConn 

96 

97 if len(dictList) == 0: 

98 log.warning( 

99 'the dictionary to be added to the database is empty' % locals()) 

100 return None 

101 

102 if len(dictList): 

103 # FIND BUG IN MYSQL QUERY BY UNCOMMENTING 

104 # tot = len(dictList) 

105 # for index, d in enumerate(dictList): 

106 # if index > 1: 

107 # # Cursor up one line and clear line 

108 # sys.stdout.write("\x1b[1A\x1b[2K") 

109 

110 # percent = (float(index) / float(tot)) * 100. 

111 # print('%(index)s/%(tot)s (%(percent)1.1f%% done)' % locals()) 

112 

113 # convert_dictionary_to_mysql_table( 

114 # dbConn=dbConn, 

115 # log=log, 

116 # dictionary=d, 

117 # dbTableName=dbTableName, 

118 # uniqueKeyList=uniqueKeyList, 

119 # dateModified=dateModified, 

120 # reDatetime=reDate, 

121 # replace=replace, 

122 # dateCreated=dateCreated) 

123 # sys.exit(0) 

124 

125 convert_dictionary_to_mysql_table( 

126 dbConn=dbConn, 

127 log=log, 

128 dictionary=dictList[0], 

129 dbTableName=dbTableName, 

130 uniqueKeyList=uniqueKeyList, 

131 dateModified=dateModified, 

132 reDatetime=reDate, 

133 replace=replace, 

134 dateCreated=dateCreated) 

135 dictList = dictList[1:] 

136 

137 dbConn.autocommit(False) 

138 

139 if len(dictList): 

140 

141 total = len(dictList) 

142 batches = int(old_div(total, batchSize)) 

143 

144 start = 0 

145 end = 0 

146 sharedList = [] 

147 for i in range(batches + 1): 

148 end = end + batchSize 

149 start = i * batchSize 

150 thisBatch = dictList[start:end] 

151 sharedList.append((thisBatch, end)) 

152 

153 totalCount = total + 1 

154 ltotalCount = totalCount 

155 

156 print("Starting to insert %(ltotalCount)s rows into %(dbTableName)s" % locals()) 

157 

158 if dbSettings == False: 

159 fmultiprocess( 

160 log=log, 

161 function=_insert_single_batch_into_database, 

162 inputArray=list(range(len(sharedList))), 

163 dbTableName=dbTableName, 

164 uniqueKeyList=uniqueKeyList, 

165 dateModified=dateModified, 

166 replace=replace, 

167 batchSize=batchSize, 

168 reDatetime=reDate, 

169 dateCreated=dateCreated 

170 ) 

171 

172 else: 

173 fmultiprocess(log=log, function=_add_dictlist_to_database_via_load_in_file, 

174 inputArray=list(range(len(sharedList))), dbTablename=dbTableName, 

175 dbSettings=dbSettings, dateModified=dateModified) 

176 

177 sys.stdout.write("\x1b[1A\x1b[2K") 

178 print("%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s" % locals()) 

179 

180 log.debug( 

181 'completed the ``insert_list_of_dictionaries_into_database_tables`` function') 

182 return None 

183 

184 

185def _insert_single_batch_into_database( 

186 batchIndex, 

187 log, 

188 dbTableName, 

189 uniqueKeyList, 

190 dateModified, 

191 replace, 

192 batchSize, 

193 reDatetime, 

194 dateCreated): 

195 """*summary of function* 

196 

197 **Key Arguments:** 

198 - ``batchIndex`` -- the index of the batch to insert 

199 - ``dbConn`` -- mysql database connection 

200 - ``log`` -- logger 

201 

202 **Return:** 

203 - None 

204 

205 **Usage:** 

206 .. todo:: 

207 

208 add usage info 

209 create a sublime snippet for usage 

210 

211 .. code-block:: python  

212 

213 usage code  

214 """ 

215 log.debug('starting the ``_insert_single_batch_into_database`` function') 

216 

217 global totalCount 

218 global globalDbConn 

219 global sharedList 

220 

221 batch = sharedList[batchIndex] 

222 

223 reDate = reDatetime 

224 

225 if isinstance(globalDbConn, dict): 

226 # SETUP ALL DATABASE CONNECTIONS 

227 

228 dbConn = database( 

229 log=log, 

230 dbSettings=globalDbConn, 

231 autocommit=False 

232 ).connect() 

233 else: 

234 dbConn = globalDbConn 

235 

236 count = batch[1] 

237 if count > totalCount: 

238 count = totalCount 

239 ltotalCount = totalCount 

240 

241 inserted = False 

242 while inserted == False: 

243 

244 if not replace: 

245 insertVerb = "INSERT IGNORE" 

246 else: 

247 insertVerb = "INSERT IGNORE" 

248 

249 uniKeys = set().union(*(list(d.keys()) for d in batch[0])) 

250 tmp = [] 

251 tmp[:] = [m.replace(" ", "_").replace( 

252 "-", "_") for m in uniKeys] 

253 uniKeys = tmp 

254 

255 myKeys = '`,`'.join(uniKeys) 

256 vals = [tuple([None if d[k] in ["None", None] else d[k] 

257 for k in uniKeys]) for d in batch[0]] 

258 valueString = ("%s, " * len(vals[0]))[:-2] 

259 insertCommand = insertVerb + """ INTO `""" + dbTableName + \ 

260 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ 

261 valueString + """, NOW())""" 

262 

263 if not dateCreated: 

264 insertCommand = insertCommand.replace( 

265 ", dateCreated)", ")").replace(", NOW())", ")") 

266 

267 dup = "" 

268 if replace: 

269 dup = " ON DUPLICATE KEY UPDATE " 

270 for k in uniKeys: 

271 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() 

272 dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals() 

273 

274 insertCommand = insertCommand + dup 

275 

276 insertCommand = insertCommand.replace('\\""', '\\" "') 

277 insertCommand = insertCommand.replace('""', "null") 

278 insertCommand = insertCommand.replace('"None"', 'null') 

279 

280 message = "" 

281 # log.debug('adding new data to the %s table; query: %s' % 

282 # (dbTableName, addValue)) 

283 try: 

284 message = writequery( 

285 log=log, 

286 sqlQuery=insertCommand, 

287 dbConn=dbConn, 

288 Force=True, 

289 manyValueList=vals 

290 ) 

291 except: 

292 theseInserts = [] 

293 for aDict in batch[0]: 

294 insertCommand, valueTuple = convert_dictionary_to_mysql_table( 

295 dbConn=dbConn, 

296 log=log, 

297 dictionary=aDict, 

298 dbTableName=dbTableName, 

299 uniqueKeyList=uniqueKeyList, 

300 dateModified=dateModified, 

301 returnInsertOnly=True, 

302 replace=replace, 

303 reDatetime=reDate, 

304 skipChecks=True 

305 ) 

306 theseInserts.append(valueTuple) 

307 

308 message = "" 

309 # log.debug('adding new data to the %s table; query: %s' % 

310 # (dbTableName, addValue)) 

311 message = writequery( 

312 log=log, 

313 sqlQuery=insertCommand, 

314 dbConn=dbConn, 

315 Force=True, 

316 manyValueList=theseInserts 

317 ) 

318 

319 if message == "unknown column": 

320 for aDict in batch: 

321 convert_dictionary_to_mysql_table( 

322 dbConn=dbConn, 

323 log=log, 

324 dictionary=aDict, 

325 dbTableName=dbTableName, 

326 uniqueKeyList=uniqueKeyList, 

327 dateModified=dateModified, 

328 reDatetime=reDate, 

329 replace=replace 

330 ) 

331 else: 

332 inserted = True 

333 

334 dbConn.commit() 

335 

336 log.debug('completed the ``_insert_single_batch_into_database`` function') 

337 return "None" 

338 

339 

340def _add_dictlist_to_database_via_load_in_file( 

341 masterListIndex, 

342 dbTablename, 

343 dbSettings, 

344 dateModified=False): 

345 """*load a list of dictionaries into a database table with load data infile* 

346 

347 **Key Arguments:** 

348 

349 - ``masterListIndex`` -- the index of the sharedList of dictionary lists to process 

350 - ``dbTablename`` -- the name of the database table to add the list to 

351 - ``dbSettings`` -- the dictionary of database settings 

352 - ``log`` -- logger 

353 - ``dateModified`` -- add a dateModified stamp with an updated flag to rows? 

354 

355 **Return:** 

356 - None 

357 

358 **Usage:** 

359 .. todo:: 

360 

361 add usage info 

362 create a sublime snippet for usage 

363 

364 .. code-block:: python 

365 

366 usage code 

367 """ 

368 from fundamentals.logs import emptyLogger 

369 log = emptyLogger() 

370 log.debug('starting the ``_add_dictlist_to_database_via_load_in_file`` function') 

371 

372 global sharedList 

373 

374 dictList = sharedList[masterListIndex][0] 

375 

376 count = sharedList[masterListIndex][1] 

377 if count > totalCount: 

378 count = totalCount 

379 ltotalCount = totalCount 

380 

381 # SETUP ALL DATABASE CONNECTIONS 

382 dbConn = database( 

383 log=log, 

384 dbSettings=dbSettings 

385 ).connect() 

386 

387 now = datetime.now() 

388 tmpTable = now.strftime("tmp_%Y%m%dt%H%M%S%f") 

389 

390 # CREATE A TEMPORY TABLE TO ADD DATA TO 

391 sqlQuery = """CREATE TEMPORARY TABLE %(tmpTable)s SELECT * FROM %(dbTablename)s WHERE 1=0;""" % locals() 

392 writequery( 

393 log=log, 

394 sqlQuery=sqlQuery, 

395 dbConn=dbConn 

396 ) 

397 

398 csvColumns = [k for d in dictList for k in list(d.keys())] 

399 csvColumns = list(set(csvColumns)) 

400 csvColumnsString = (', ').join(csvColumns) 

401 csvColumnsString = csvColumnsString.replace(u" dec,", u" decl,") 

402 

403 df = pd.DataFrame(dictList) 

404 df.replace(['nan', 'None', '', 'NaN', np.nan], '\\N', inplace=True) 

405 df.to_csv('/tmp/%(tmpTable)s' % locals(), sep="|", 

406 index=False, escapechar="\\", quotechar='"', columns=csvColumns, encoding='utf-8') 

407 

408 sqlQuery = """LOAD DATA LOCAL INFILE '/tmp/%(tmpTable)s' 

409INTO TABLE %(tmpTable)s 

410FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"' 

411IGNORE 1 LINES 

412(%(csvColumnsString)s);""" % locals() 

413 

414 writequery( 

415 log=log, 

416 sqlQuery=sqlQuery, 

417 dbConn=dbConn 

418 ) 

419 

420 updateStatement = "" 

421 for i in csvColumns: 

422 updateStatement += "`%(i)s` = VALUES(`%(i)s`), " % locals() 

423 if dateModified: 

424 updateStatement += "dateLastModified = NOW(), updated = 1" 

425 else: 

426 updateStatement = updateStatement[0:-2] 

427 

428 sqlQuery = """ 

429INSERT IGNORE INTO %(dbTablename)s 

430SELECT * FROM %(tmpTable)s 

431ON DUPLICATE KEY UPDATE %(updateStatement)s;""" % locals() 

432 writequery( 

433 log=log, 

434 sqlQuery=sqlQuery, 

435 dbConn=dbConn 

436 ) 

437 

438 sqlQuery = """DROP TEMPORARY TABLE %(tmpTable)s;""" % locals() 

439 writequery( 

440 log=log, 

441 sqlQuery=sqlQuery, 

442 dbConn=dbConn 

443 ) 

444 

445 try: 

446 os.remove('/tmp/%(tmpTable)s' % locals()) 

447 except: 

448 pass 

449 

450 dbConn.close() 

451 

452 log.debug( 

453 'completed the ``_add_dictlist_to_database_via_load_in_file`` function') 

454 return None 

455 

456# use the tab-trigger below for new function 

457# xt-def-function