Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# cardinal_pythonlib/network.py 

3 

4""" 

5=============================================================================== 

6 

7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com). 

8 

9 This file is part of cardinal_pythonlib. 

10 

11 Licensed under the Apache License, Version 2.0 (the "License"); 

12 you may not use this file except in compliance with the License. 

13 You may obtain a copy of the License at 

14 

15 https://www.apache.org/licenses/LICENSE-2.0 

16 

17 Unless required by applicable law or agreed to in writing, software 

18 distributed under the License is distributed on an "AS IS" BASIS, 

19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

20 See the License for the specific language governing permissions and 

21 limitations under the License. 

22 

23=============================================================================== 

24 

25**Network support functions.** 

26 

27NOTES: 

28 

29- ``ping`` requires root authority to create ICMP sockets in Linux 

30- the ``/bin/ping`` command doesn't need prior root authority (because it has 

31 the setuid bit set) 

32- For Linux, it's therefore best to use the system ``ping``. 

33 

34https://stackoverflow.com/questions/2953462/pinging-servers-in-python 

35https://stackoverflow.com/questions/316866/ping-a-site-in-python 

36 

37- Note that if you want a sub-second timeout, things get trickier. 

38 One option is ``fping``. 

39 

40""" 

41 

42import os 

43import ssl 

44import subprocess 

45import sys 

46import tempfile 

47from typing import BinaryIO, Dict, Generator, Iterable 

48import urllib.request 

49 

50from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler 

51 

52log = get_brace_style_log_with_null_handler(__name__) 

53 

54 

55# ============================================================================= 

56# Ping 

57# ============================================================================= 

58 

59def ping(hostname: str, timeout_s: int = 5) -> bool: 

60 """ 

61 Pings a host, using OS tools. 

62 

63 Args: 

64 hostname: host name or IP address 

65 timeout_s: timeout in seconds 

66 

67 Returns: 

68 was the ping successful? 

69 

70 """ 

71 if sys.platform == "win32": 

72 timeout_ms = timeout_s * 1000 

73 args = [ 

74 "ping", 

75 hostname, 

76 "-n", "1", # ping count 

77 "-w", str(timeout_ms), # timeout 

78 ] 

79 elif sys.platform.startswith('linux'): 

80 args = [ 

81 "ping", 

82 hostname, 

83 "-c", "1", # ping count 

84 "-w", str(timeout_s), # timeout 

85 ] 

86 else: 

87 raise AssertionError("Don't know how to ping on this operating system") 

88 proc = subprocess.Popen(args, 

89 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 

90 proc.communicate() 

91 retcode = proc.returncode 

92 return retcode == 0 # zero success, non-zero failure 

93 

94 

95# ============================================================================= 

96# Download things 

97# ============================================================================= 

98 

99def download(url: str, 

100 filename: str, 

101 skip_cert_verify: bool = True, 

102 headers: Dict[str, str] = None) -> None: 

103 """ 

104 Downloads a URL to a file. 

105 

106 Args: 

107 url: 

108 URL to download from 

109 filename: 

110 file to save to 

111 skip_cert_verify: 

112 skip SSL certificate check? 

113 headers: 

114 request headers (if not specified, a default will be used that 

115 mimics Mozilla 5.0 to avoid certain HTTP 403 errors) 

116 """ 

117 headers = { 

118 'User-Agent': 'Mozilla/5.0' 

119 } if headers is None else headers 

120 log.info("Downloading from {} to {}", url, filename) 

121 

122 # urllib.request.urlretrieve(url, filename) 

123 # ... sometimes fails (e.g. downloading 

124 # https://www.openssl.org/source/openssl-1.1.0g.tar.gz under Windows) with: 

125 # ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:777) # noqa 

126 # ... due to this certificate root problem (probably because OpenSSL 

127 # [used by Python] doesn't play entirely by the same rules as others?): 

128 # https://stackoverflow.com/questions/27804710 

129 # So: 

130 

131 # Patching this by faking a browser request by adding User-Agent to request 

132 # headers, using this as example: 

133 # https://stackoverflow.com/questions/42863240/how-to-get-round-the-http-error-403-forbidden-with-urllib-request-using-python # noqa 

134 

135 ctx = ssl.create_default_context() # type: ssl.SSLContext 

136 if skip_cert_verify: 

137 log.debug("Skipping SSL certificate check for " + url) 

138 ctx.check_hostname = False 

139 ctx.verify_mode = ssl.CERT_NONE 

140 page = urllib.request.Request(url, headers=headers) 

141 with urllib.request.urlopen(page, context=ctx) as u, \ 

142 open(filename, 'wb') as f: 

143 f.write(u.read()) 

144 

145 

146# ============================================================================= 

147# Generators 

148# ============================================================================= 

149 

150def gen_binary_files_from_urls( 

151 urls: Iterable[str], 

152 on_disk: bool = False, 

153 show_info: bool = True) -> Generator[BinaryIO, None, None]: 

154 """ 

155 Generate binary files from a series of URLs (one per URL). 

156 

157 Args: 

158 urls: iterable of URLs 

159 on_disk: if ``True``, yields files that are on disk (permitting 

160 random access); if ``False``, yields in-memory files (which will 

161 not permit random access) 

162 show_info: show progress to the log? 

163 

164 Yields: 

165 files, each of type :class:`BinaryIO` 

166 

167 """ 

168 for url in urls: 

169 if on_disk: 

170 # Necessary for e.g. zip processing (random access) 

171 with tempfile.TemporaryDirectory() as tmpdir: 

172 filename = os.path.join(tmpdir, "tempfile") 

173 download(url=url, filename=filename) 

174 with open(filename, 'rb') as f: 

175 yield f 

176 else: 

177 if show_info: 

178 log.info("Reading from URL: {}", url) 

179 with urllib.request.urlopen(url) as f: 

180 yield f 

181 if show_info: 

182 log.info("... finished reading from URL: {}", url)