Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 275
- Log:
Add MD5 tool used to update Radiant configuration files
with checksum data.
- Author:
- rool
- Date:
- Fri Feb 12 14:14:55 +0000 2010
- Size:
- 8214 Bytes
1 | #!/bin/ruby |
2 | |
3 | ######################################################################## |
4 | # File:: make-md5.rb |
5 | # (C):: Hipposoft 2010 |
6 | # |
7 | # Purpose:: Calculate new or updated MD5 checksums for files. |
8 | # ---------------------------------------------------------------------- |
9 | # 11-Feb-2010 (ADH): Created. |
10 | ######################################################################## |
11 | |
12 | if ( ARGV.size == 0 ) |
13 | puts |
14 | puts "Usage:" |
15 | puts "ruby make-md5.rb <file-or-dir> [<file-or-dir> ...]" |
16 | puts |
17 | puts "Given one or more directories or files, scans the directories for" |
18 | puts "files without recursion and/or loads the file(s) and calculates MD5" |
19 | puts "checksums for them. A YAML configuration file at 'config/config.yml'" |
20 | puts "inside each directory given, or the directory containing each file" |
21 | puts "given, must be present in a format appropriate for the ROOL extended" |
22 | puts "Radiant directory listing parser." |
23 | puts |
24 | puts "Configuration file entries are found by looking up the YAML hash with" |
25 | puts "a key based on:" |
26 | puts |
27 | puts " * The leaf name up to but excluding the first '.' in the name" |
28 | puts " * The entire leaf name" |
29 | puts |
30 | puts "...in that order. Key lookup is case sensitive. The value found (if" |
31 | puts "any) is itself a hash into which keys 'md5' and 'md5_time' will be" |
32 | puts "added. A new entry in the configuration file consisting of jut the" |
33 | puts "MD5 data will be added should there be no existing entry (a warning" |
34 | puts "is printed to stdout in such csaes)." |
35 | puts |
36 | puts "If MD5 data is already present then the checksum will only be" |
37 | puts "computed if 'md5_time' is older than the file datestamp at the time" |
38 | puts "the script runs." |
39 | puts |
40 | exit() |
41 | end |
42 | |
43 | # Load data in chunks of this many bytes for MD5 calculation |
44 | |
45 | BUFFER_SIZE = 1024768 |
46 | |
47 | # External dependencies |
48 | |
49 | require 'find' |
50 | require 'yaml' |
51 | require 'digest/md5' |
52 | |
53 | # ============================================================================= |
54 | # Return an MD5 checksum in hex form for the file at the given pathname. |
55 | # Reads in chunks of $BUFLEN size to avoid excessive RAM penalties for |
56 | # large files. |
57 | # ============================================================================= |
58 | # |
59 | def calculate_md5_checksum( path ) |
60 | |
61 | # Distantly based upon: |
62 | # |
63 | # http://snippets.dzone.com/posts/show/3349 |
64 | |
65 | md5 = Digest::MD5.new |
66 | |
67 | File.open( path, 'r' ) do | io | |
68 | print "Calculating MD5 checksum for file '#{ File.basename( path ) }'" |
69 | counter = 0 |
70 | |
71 | while ( ! io.eof ) |
72 | putc '.' if ( ( counter += 1 ) % 3 == 0 ) |
73 | buffer = io.readpartial( BUFFER_SIZE ) |
74 | md5.update( buffer ) |
75 | end |
76 | |
77 | puts |
78 | end |
79 | |
80 | return md5.hexdigest |
81 | end |
82 | |
83 | # ============================================================================= |
84 | # Return a hash describing a file at a given pathname. See the implementation |
85 | # for details of the hash contents. |
86 | # ============================================================================= |
87 | # |
88 | def get_file_description( path ) |
89 | |
90 | # Parser: Various charaters, a dot, then: one or more digits (0-9) |
91 | # followed by an optional dot, repeated at least once, this whole |
92 | # assembly optional, recording only the collection of digits and |
93 | # dots, not individual digits-plus-dots sets ("(?:" => don't include |
94 | # this group in the match data). Then zero or more other characters, |
95 | # non-greedy. |
96 | |
97 | mod = File.mtime( path ) |
98 | leaf = File.basename( path ) |
99 | regexp = /^(.*?)\.((?:[0-9]+\.?)+)?(.*?)$/ |
100 | scanned = leaf.scan( regexp )[ 0 ] |
101 | base_name = scanned[ 0 ] |
102 | version = ( scanned[ 1 ] || '' ).chomp( '.' ) # May have trailing '.' |
103 | filetype = scanned[ 2 ] || '' |
104 | |
105 | return { |
106 | :path => path, |
107 | :leaf => leaf, |
108 | :base_name => base_name, |
109 | :link => "#{ path[ leaf.length..-1 ] }?#{ mod.tv_sec }", |
110 | :mod => File.mtime( path ), |
111 | :filetype => filetype, |
112 | :version => version |
113 | } |
114 | end |
115 | |
116 | # ============================================================================= |
117 | # Return an array of items describing a directory contents and optionally |
118 | # the contents of any subdirectories as a flat unsorted list. The last |
119 | # parameter is 'false' to avoid scanning to a level beyond the current |
120 | # directory. The first two parameters are concatenated to generate the |
121 | # path of the directory to scan. |
122 | # ============================================================================= |
123 | # |
124 | def recursive_directory_list( base, dir, recurse = true ) |
125 | |
126 | # Distantly based upon: |
127 | # |
128 | # http://www.oreillynet.com/onjava/blog/2006/03/recursive_directory_list_with.html |
129 | |
130 | excludes = [ 'CVS', '.svn' ] |
131 | collect = []; |
132 | dir = File.join( base, dir ) |
133 | first = true |
134 | |
135 | Find.find( dir ) do | path | |
136 | if ( FileTest.directory?( path ) ) |
137 | unless ( recurse ) |
138 | if ( first ) |
139 | first = false # Skip '.' / "this" directory |
140 | next |
141 | else |
142 | Find.prune # Don't descend into the directory |
143 | end |
144 | else |
145 | if ( excludes.include?( File.basename( path ) ) ) |
146 | Find.prune # Don't descend into the directory |
147 | else |
148 | next |
149 | end |
150 | end |
151 | else |
152 | collect.push( get_file_description( path ) ) |
153 | end |
154 | end |
155 | |
156 | return collect |
157 | end |
158 | |
159 | # ============================================================================= |
160 | # Main processing loop. |
161 | # ============================================================================= |
162 | |
163 | ARGV.each do | path | |
164 | |
165 | # Either read the contents of a given directory without recursion and look |
166 | # for a configuration folder inside it, or read a file and look for a |
167 | # configuration folder inside the directory in which the file resides. |
168 | |
169 | puts "Processing '#{ path }'..." |
170 | |
171 | if ( File.directory?( path ) ) |
172 | collection = recursive_directory_list( path, '', false ) |
173 | else |
174 | collection = [ get_file_description( path ) ] |
175 | path = File.dirname( path ) |
176 | end |
177 | |
178 | config_path = File.join( path, 'config', 'config.yml' ) |
179 | |
180 | puts "Using '#{ config_path }'" |
181 | |
182 | config_data = YAML.load_file( config_path ) |
183 | config_changed = false |
184 | |
185 | for description in collection |
186 | path = description[ :path ] |
187 | leaf = description[ :leaf ] |
188 | base_name = description[ :base_name ] |
189 | last_mod = description[ :mod ] |
190 | |
191 | info = config_data[ base_name ] || config_data[ leaf ] |
192 | |
193 | if ( info.nil? ) |
194 | puts "WARNING: File '#{ leaf }' has no entry in config.yml" |
195 | |
196 | info = {} |
197 | config_data[ base_name ] = info |
198 | end |
199 | |
200 | if ( info[ 'md5' ].nil? || info[ 'md5_time' ].nil? || info[ 'md5_time' ] < last_mod ) |
201 | info[ 'md5_time' ] = last_mod |
202 | info[ 'md5' ] = calculate_md5_checksum( path ) |
203 | config_changed = true |
204 | else |
205 | puts "Checksum for file '#{ leaf }' is up to date." |
206 | end |
207 | end |
208 | |
209 | if ( config_changed ) |
210 | puts "Saving '#{ config_path }'" |
211 | |
212 | sorted_data = config_data.sort do | a, b | |
213 | |
214 | # Sorted hashes get converted to nested arrays of key/value pairs, so |
215 | # in our case we get a file leaf/base name at index 0 and the data hash |
216 | # at index 1 of each inner pair. Sort by group then base name. |
217 | |
218 | ( ( a[ 1 ][ 'group' ] || '' ) <=> ( b[ 1 ][ 'group' ] || '' ) ).nonzero? || |
219 | ( a[ 0 ] <=> b[ 0 ] ) |
220 | |
221 | end |
222 | |
223 | # "YAML.dump" provides no sorting mechanism. Ruby 1.9 should help since |
224 | # hash ordering is well defined, but for Ruby 1.8 with undefined order of |
225 | # hash key enumeration, do it the hard way. Otherwise, the unsorted YAML |
226 | # output is hard for humans to read or hand-modify. |
227 | |
228 | File.open( config_path, 'w' ) do | io | |
229 | |
230 | # Write the file header. |
231 | |
232 | io << "---\n" |
233 | |
234 | # Each individual YAML string is returned as an individual file and has |
235 | # a file header line; output everything except this line. |
236 | |
237 | old_group = nil |
238 | |
239 | for pair in sorted_data |
240 | base_name = pair[ 0 ] |
241 | data = pair[ 1 ] |
242 | group = data[ 'group' ] || '' |
243 | |
244 | if ( old_group != group ) |
245 | io << "\n\# #{ ( group.empty? ) ? 'Ungrouped' : group }\n\n" |
246 | old_group = group |
247 | end |
248 | |
249 | io << ( { base_name => data } ).to_yaml.sub( /.*?\n/, '' ) |
250 | end |
251 | end |
252 | end |
253 | |
254 | puts "...Processing complete." |
255 | |
256 | end |
257 | |
258 | puts "Finished." |