Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2camcops_server/cc_modules/cc_proquint.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com). 

7 

8 This file is part of CamCOPS. 

9 

10 CamCOPS is free software: you can redistribute it and/or modify 

11 it under the terms of the GNU General Public License as published by 

12 the Free Software Foundation, either version 3 of the License, or 

13 (at your option) any later version. 

14 

15 CamCOPS is distributed in the hope that it will be useful, 

16 but WITHOUT ANY WARRANTY; without even the implied warranty of 

17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18 GNU General Public License for more details. 

19 

20 You should have received a copy of the GNU General Public License 

21 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

22 

23=============================================================================== 

24 

25Convert integers into Pronounceable Quintuplets (proquints) 

26https://arxiv.org/html/0901.4016 

27 

28Based on https://github.com/dsw/proquint, which has the following licence: 

29 

30--8<--------------------------------------------------------------------------- 

31 

32Copyright (c) 2009 Daniel S. Wilkerson 

33All rights reserved. 

34 

35Redistribution and use in source and binary forms, with or without 

36modification, are permitted provided that the following conditions are 

37met: 

38 

39 Redistributions of source code must retain the above copyright 

40 notice, this list of conditions and the following disclaimer. 

41 Redistributions in binary form must reproduce the above copyright 

42 notice, this list of conditions and the following disclaimer in 

43 the documentation and/or other materials provided with the 

44 distribution. 

45 

46 Neither the name of Daniel S. Wilkerson nor the names of its 

47 contributors may be used to endorse or promote products derived 

48 from this software without specific prior written permission. 

49 

50THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

51"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

52LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

53A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

54OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

55SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

56LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

57DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

58THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

59(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

60OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

61 

62--8<--------------------------------------------------------------------------- 

63 

64 

65""" 

66import uuid 

67 

68CONSONANTS = "bdfghjklmnprstvz" 

69VOWELS = "aiou" 

70 

71SIZE_OF_CONSONANT = 4 

72SIZE_OF_VOWEL = 2 

73 

74LOOKUP_CONSONANTS = { 

75 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3, 

76 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7, 

77 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb, 

78 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf, 

79} 

80LOOKUP_VOWELS = { 

81 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3, 

82} 

83LOOKUP_TABLE = { 

84 **LOOKUP_CONSONANTS, **LOOKUP_VOWELS, 

85} 

86 

87 

88class InvalidProquintException(Exception): 

89 pass 

90 

91 

92def proquint_from_uuid(uuid_obj: uuid.UUID) -> str: 

93 """ 

94 Convert UUID to proquint (via the UUID's 128-bit integer representation). 

95 """ 

96 return proquint_from_int(uuid_obj.int, 128) 

97 

98 

99def proquint_from_int(int_value: int, 

100 size_in_bits: int) -> str: 

101 """Convert integer value into proquint 

102 

103 .. code-block:: none 

104 

105 >>> proquint_from_int(0x493b05ee, 32) 

106 hohur-bilov 

107 

108 0x493b05ee in binary is: 

109 0100 1001 0011 1011 - 0000 0101 1110 1110 

110 

111 grouped into alternating 4 and 2 bit values: 

112 

113 cons vo cons vo cons - cons vo cons vo cons 

114 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

115 

116 h o h u r - b i l o v 

117 

118 Args: 

119 int_value: 

120 integer value to encode 

121 size_in_bits: 

122 size of integer in bits (must be a multiple of 16) 

123 

124 Returns: 

125 proquint string identifier 

126 """ 

127 proquint = [] 

128 

129 if size_in_bits % 16 != 0: 

130 raise ValueError( 

131 f"size_in_bits ({size_in_bits}) must be a multiple of 16" 

132 ) 

133 

134 for i in range(size_in_bits // 16): 

135 proquint.insert(0, _proquint_from_int16(int_value & 0xffff)) 

136 

137 int_value >>= 16 

138 

139 check_character = _generate_check_character("".join(proquint)) 

140 

141 proquint.append(check_character) 

142 

143 return "-".join(proquint) 

144 

145 

146def _generate_check_character(proquint: str) -> str: 

147 """ 

148 Luhn mod 16 check digit 

149 

150 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm 

151 

152 .. code-block:: none 

153 consonant_values = { 

154 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3, 

155 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7, 

156 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb, 

157 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf, 

158 } 

159 

160 vowel_values = { 

161 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3, 

162 } 

163 

164 To generate the check character, start with the last character in the 

165 string and move left doubling every other code-point. The "digits" of 

166 the code-points as written in hex (since there are 16 valid input 

167 characters) should then be summed up: 

168 

169 Example (all in hex): 

170 

171 hohur-bilov 

172 

173 Character h o h u r b i l o v 

174 Code point 4 2 4 3 b 0 1 7 2 e 

175 Double 4 6 0 e 1c 

176 Reduce 4 4 4 6 b 0 1 e 2 1+c 

177 Sum 4 4 4 6 b 0 1 e 2 d 

178 

179 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b 

180 Next multiple of 0x10 is 0x40 

181 

182 Check character code = 0x40 - 0x3b = 0x5 

183 So check character is 'j' 

184 

185 """ 

186 

187 remainder = _generate_luhn_mod_16_remainder(proquint, 2) 

188 

189 check_code_point = (16 - remainder) % 16 

190 

191 return CONSONANTS[check_code_point] 

192 

193 

194def _proquint_from_int16(int16_value: int) -> str: 

195 """ 

196 Convert 16-bit integer into proquint. 

197 """ 

198 proquint = [] 

199 for i in range(5): 

200 if i & 1: 

201 letters = VOWELS 

202 mask = 0x3 

203 shift = SIZE_OF_VOWEL 

204 else: 

205 letters = CONSONANTS 

206 mask = 0xf 

207 shift = SIZE_OF_CONSONANT 

208 

209 index = int16_value & mask 

210 proquint.insert(0, letters[index]) 

211 int16_value >>= shift 

212 

213 return ''.join(proquint) 

214 

215 

216def uuid_from_proquint(proquint: str) -> uuid.UUID: 

217 """ 

218 Convert proquint to UUID. 

219 """ 

220 int_value = int_from_proquint(proquint) 

221 

222 return uuid.UUID(int=int_value) 

223 

224 

225def int_from_proquint(proquint: str) -> int: 

226 """ 

227 Convert proquint string into integer. 

228 

229 .. code-block:; none 

230 

231 >>> hex(int_from_proquint('hohur-bilov-j')) 

232 0x493b05ee 

233 

234 h o h u r - b i l o v 

235 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe 

236 

237 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

238 0100 1001 0011 1011 - 0000 0101 1110 1110 

239 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe 

240 

241 Args: 

242 proquint: 

243 string to decode 

244 Returns: 

245 converted integer value 

246 """ 

247 

248 int_value = 0 

249 

250 words = proquint.split("-") 

251 

252 if not _is_valid_proquint("".join(words)): 

253 raise InvalidProquintException( 

254 f"'{proquint}' is not valid (check character mismatch)" 

255 ) 

256 

257 # Remove check character 

258 words.pop() 

259 

260 for word in words: 

261 for (i, c) in enumerate(word): 

262 if i & 1: 

263 lookup_table = LOOKUP_VOWELS 

264 shift = SIZE_OF_VOWEL 

265 else: 

266 lookup_table = LOOKUP_CONSONANTS 

267 shift = SIZE_OF_CONSONANT 

268 

269 value = lookup_table.get(c) 

270 

271 if value is None: 

272 raise InvalidProquintException( 

273 f"'{proquint}' contains invalid or transposed characters" 

274 ) 

275 

276 int_value <<= shift 

277 int_value += value 

278 

279 return int_value 

280 

281 

282def _is_valid_proquint(proquint: str) -> bool: 

283 """ 

284 Does the proquint validate? 

285 """ 

286 return _generate_luhn_mod_16_remainder(proquint, 1) == 0 

287 

288 

289def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int: 

290 """ 

291 Part of the checksum calculations; see :func:`_generate_check_character`. 

292 For a valid sequence, the overall remainder should be 0. 

293 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm. 

294 """ 

295 factor = start_factor 

296 sum_ = 0 

297 

298 for char in reversed(proquint): 

299 value = LOOKUP_TABLE[char] * factor 

300 sum_ = sum_ + value // 16 + value % 16 

301 

302 if factor == 2: 

303 factor = 1 

304 else: 

305 factor = 2 

306 

307 return sum_ % 16