1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """File and file-path manipulation utilities.
19
20 :group path manipulation: first_level_directory, relative_path, is_binary,\
21 get_by_ext, remove_dead_links
22 :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\
23 write_open_mode, ensure_fs_mode, export
24 :sort: path manipulation, file manipulation
25 """
26
27 from __future__ import print_function
28
29 __docformat__ = "restructuredtext en"
30
31 import io
32 import sys
33 import shutil
34 import mimetypes
35 from os.path import isabs, isdir, islink, split, exists, normpath, join
36 from os.path import abspath
37 from os import sep, mkdir, remove, listdir, stat, chmod, walk
38 from stat import ST_MODE, S_IWRITE
39
40 from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS
41 from logilab.common.shellutils import find
42 from logilab.common.deprecation import deprecated
43 from logilab.common.compat import FileIO
44
46 """Return the first level directory of a path.
47
48 >>> first_level_directory('home/syt/work')
49 'home'
50 >>> first_level_directory('/home/syt/work')
51 '/'
52 >>> first_level_directory('work')
53 'work'
54 >>>
55
56 :type path: str
57 :param path: the path for which we want the first level directory
58
59 :rtype: str
60 :return: the first level directory appearing in `path`
61 """
62 head, tail = split(path)
63 while head and tail:
64 head, tail = split(head)
65 if tail:
66 return tail
67
68 return head
69
71 """Lists path's content using absolute paths."""
72 path = abspath(path)
73 return [join(path, filename) for filename in listdir(path)]
74
75
77 """Return true if filename may be a binary file, according to it's
78 extension.
79
80 :type filename: str
81 :param filename: the name of the file
82
83 :rtype: bool
84 :return:
85 true if the file is a binary file (actually if it's mime type
86 isn't beginning by text/)
87 """
88 try:
89 return not mimetypes.guess_type(filename)[0].startswith('text')
90 except AttributeError:
91 return 1
92
93
95 """Return the write mode that should used to open file.
96
97 :type filename: str
98 :param filename: the name of the file
99
100 :rtype: str
101 :return: the mode that should be use to open the file ('w' or 'wb')
102 """
103 if is_binary(filename):
104 return 'wb'
105 return 'w'
106
107
109 """Check that the given file has the given mode(s) set, else try to
110 set it.
111
112 :type filepath: str
113 :param filepath: path of the file
114
115 :type desired_mode: int
116 :param desired_mode:
117 ORed flags describing the desired mode. Use constants from the
118 `stat` module for file permission's modes
119 """
120 mode = stat(filepath)[ST_MODE]
121 if not mode & desired_mode:
122 chmod(filepath, mode | desired_mode)
123
124
125
127 """A special file-object class that automatically does a 'chmod +w' when
128 needed.
129
130 XXX: for now, the way it is done allows 'normal file-objects' to be
131 created during the ProtectedFile object lifetime.
132 One way to circumvent this would be to chmod / unchmod on each
133 write operation.
134
135 One other way would be to :
136
137 - catch the IOError in the __init__
138
139 - if IOError, then create a StringIO object
140
141 - each write operation writes in this StringIO object
142
143 - on close()/del(), write/append the StringIO content to the file and
144 do the chmod only once
145 """
147 self.original_mode = stat(filepath)[ST_MODE]
148 self.mode_changed = False
149 if mode in ('w', 'a', 'wb', 'ab'):
150 if not self.original_mode & S_IWRITE:
151 chmod(filepath, self.original_mode | S_IWRITE)
152 self.mode_changed = True
153 FileIO.__init__(self, filepath, mode)
154
156 """restores the original mode if needed"""
157 if self.mode_changed:
158 chmod(self.name, self.original_mode)
159
160 self.mode_changed = False
161
163 """restore mode before closing"""
164 self._restore_mode()
165 FileIO.close(self)
166
168 if not self.closed:
169 self.close()
170
171
173 """Exception raised by relative path when it's unable to compute relative
174 path between two paths.
175 """
176
178 """Try to get a relative path from `from_file` to `to_file`
179 (path will be absolute if to_file is an absolute file). This function
180 is useful to create link in `from_file` to `to_file`. This typical use
181 case is used in this function description.
182
183 If both files are relative, they're expected to be relative to the same
184 directory.
185
186 >>> relative_path( from_file='toto/index.html', to_file='index.html')
187 '../index.html'
188 >>> relative_path( from_file='index.html', to_file='toto/index.html')
189 'toto/index.html'
190 >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html')
191 '../toto/index.html'
192 >>> relative_path( from_file='toto/index.html', to_file='/index.html')
193 '/index.html'
194 >>> relative_path( from_file='/toto/index.html', to_file='/index.html')
195 '../index.html'
196 >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html')
197 'summary.html'
198 >>> relative_path( from_file='index.html', to_file='index.html')
199 ''
200 >>> relative_path( from_file='/index.html', to_file='toto/index.html')
201 Traceback (most recent call last):
202 File "<string>", line 1, in ?
203 File "<stdin>", line 37, in relative_path
204 UnresolvableError
205 >>> relative_path( from_file='/index.html', to_file='/index.html')
206 ''
207 >>>
208
209 :type from_file: str
210 :param from_file: source file (where links will be inserted)
211
212 :type to_file: str
213 :param to_file: target file (on which links point)
214
215 :raise UnresolvableError: if it has been unable to guess a correct path
216
217 :rtype: str
218 :return: the relative path of `to_file` from `from_file`
219 """
220 from_file = normpath(from_file)
221 to_file = normpath(to_file)
222 if from_file == to_file:
223 return ''
224 if isabs(to_file):
225 if not isabs(from_file):
226 return to_file
227 elif isabs(from_file):
228 raise UnresolvableError()
229 from_parts = from_file.split(sep)
230 to_parts = to_file.split(sep)
231 idem = 1
232 result = []
233 while len(from_parts) > 1:
234 dirname = from_parts.pop(0)
235 if idem and len(to_parts) > 1 and dirname == to_parts[0]:
236 to_parts.pop(0)
237 else:
238 idem = 0
239 result.append('..')
240 result += to_parts
241 return sep.join(result)
242
243
245 """Return the content of the file with normalized line feeds.
246
247 :type path: str
248 :param path: path to the file to read
249
250 :rtype: str
251 :return: the content of the file with normalized line feeds
252 """
253 return open(path, 'U').read()
254 norm_read = deprecated("use \"open(path, 'U').read()\"")(norm_read)
255
257 """Return a stream for a file with content with normalized line feeds.
258
259 :type path: str
260 :param path: path to the file to open
261
262 :rtype: file or StringIO
263 :return: the opened file with normalized line feeds
264 """
265 return open(path, 'U')
266 norm_open = deprecated("use \"open(path, 'U')\"")(norm_open)
267
268 -def lines(path, comments=None):
269 """Return a list of non empty lines in the file located at `path`.
270
271 :type path: str
272 :param path: path to the file
273
274 :type comments: str or None
275 :param comments:
276 optional string which can be used to comment a line in the file
277 (i.e. lines starting with this string won't be returned)
278
279 :rtype: list
280 :return:
281 a list of stripped line in the file, without empty and commented
282 lines
283
284 :warning: at some point this function will probably return an iterator
285 """
286 with io.open(path) as stream:
287 return stream_lines(stream, comments)
288
289
291 """Return a list of non empty lines in the given `stream`.
292
293 :type stream: object implementing 'xreadlines' or 'readlines'
294 :param stream: file like object
295
296 :type comments: str or None
297 :param comments:
298 optional string which can be used to comment a line in the file
299 (i.e. lines starting with this string won't be returned)
300
301 :rtype: list
302 :return:
303 a list of stripped line in the file, without empty and commented
304 lines
305
306 :warning: at some point this function will probably return an iterator
307 """
308 try:
309 readlines = stream.xreadlines
310 except AttributeError:
311 readlines = stream.readlines
312 result = []
313 for line in readlines():
314 line = line.strip()
315 if line and (comments is None or not line.startswith(comments)):
316 result.append(line)
317 return result
318
319
323 """Make a mirror of `from_dir` in `to_dir`, omitting directories and
324 files listed in the black list or ending with one of the given
325 extensions.
326
327 :type from_dir: str
328 :param from_dir: directory to export
329
330 :type to_dir: str
331 :param to_dir: destination directory
332
333 :type blacklist: list or tuple
334 :param blacklist:
335 list of files or directories to ignore, default to the content of
336 `BASE_BLACKLIST`
337
338 :type ignore_ext: list or tuple
339 :param ignore_ext:
340 list of extensions to ignore, default to the content of
341 `IGNORED_EXTENSIONS`
342
343 :type verbose: bool
344 :param verbose:
345 flag indicating whether information about exported files should be
346 printed to stderr, default to False
347 """
348 try:
349 mkdir(to_dir)
350 except OSError:
351 pass
352
353 for directory, dirnames, filenames in walk(from_dir):
354 for norecurs in blacklist:
355 try:
356 dirnames.remove(norecurs)
357 except ValueError:
358 continue
359 for dirname in dirnames:
360 src = join(directory, dirname)
361 dest = to_dir + src[len(from_dir):]
362 if isdir(src):
363 if not exists(dest):
364 mkdir(dest)
365 for filename in filenames:
366
367
368 if any([filename.endswith(ext) for ext in ignore_ext]):
369 continue
370 src = join(directory, filename)
371 dest = to_dir + src[len(from_dir):]
372 if verbose:
373 print(src, '->', dest, file=sys.stderr)
374 if exists(dest):
375 remove(dest)
376 shutil.copy2(src, dest)
377
378
380 """Recursively traverse directory and remove all dead links.
381
382 :type directory: str
383 :param directory: directory to cleanup
384
385 :type verbose: bool
386 :param verbose:
387 flag indicating whether information about deleted links should be
388 printed to stderr, default to False
389 """
390 for dirpath, dirname, filenames in walk(directory):
391 for filename in dirnames + filenames:
392 src = join(dirpath, filename)
393 if islink(src) and not exists(src):
394 if verbose:
395 print('remove dead link', src)
396 remove(src)
397