RISC OS Open: Bugs and sources: Browsing repository

You are currently browsing the Subversion repository.
Changesets can be listed by changeset number.
The Git repository is here.

root
» rool
» tools
» trunk
» make-md5.rb

Revision:: 275

Log:: Add MD5 tool used to update Radiant configuration files
with checksum data.

Author:: rool

Date:: Fri Feb 12 14:14:55 +0000 2010

Size:: 8214 Bytes

Show revision log

1	#!/bin/ruby
2
3	########################################################################
4	# File:: make-md5.rb
5	# (C):: Hipposoft 2010
6	#
7	# Purpose:: Calculate new or updated MD5 checksums for files.
8	# ----------------------------------------------------------------------
9	# 11-Feb-2010 (ADH): Created.
10	########################################################################
11
12	if ( ARGV.size == 0 )
13	puts
14	puts "Usage:"
15	puts "ruby make-md5.rb <file-or-dir> [<file-or-dir> ...]"
16	puts
17	puts "Given one or more directories or files, scans the directories for"
18	puts "files without recursion and/or loads the file(s) and calculates MD5"
19	puts "checksums for them. A YAML configuration file at 'config/config.yml'"
20	puts "inside each directory given, or the directory containing each file"
21	puts "given, must be present in a format appropriate for the ROOL extended"
22	puts "Radiant directory listing parser."
23	puts
24	puts "Configuration file entries are found by looking up the YAML hash with"
25	puts "a key based on:"
26	puts
27	puts " * The leaf name up to but excluding the first '.' in the name"
28	puts " * The entire leaf name"
29	puts
30	puts "...in that order. Key lookup is case sensitive. The value found (if"
31	puts "any) is itself a hash into which keys 'md5' and 'md5_time' will be"
32	puts "added. A new entry in the configuration file consisting of jut the"
33	puts "MD5 data will be added should there be no existing entry (a warning"
34	puts "is printed to stdout in such csaes)."
35	puts
36	puts "If MD5 data is already present then the checksum will only be"
37	puts "computed if 'md5_time' is older than the file datestamp at the time"
38	puts "the script runs."
39	puts
40	exit()
41	end
42
43	# Load data in chunks of this many bytes for MD5 calculation
44
45	BUFFER_SIZE = 1024768
46
47	# External dependencies
48
49	require 'find'
50	require 'yaml'
51	require 'digest/md5'
52
53	# =============================================================================
54	# Return an MD5 checksum in hex form for the file at the given pathname.
55	# Reads in chunks of $BUFLEN size to avoid excessive RAM penalties for
56	# large files.
57	# =============================================================================
58	#
59	def calculate_md5_checksum( path )
60
61	# Distantly based upon:
62	#
63	# http://snippets.dzone.com/posts/show/3349
64
65	md5 = Digest::MD5.new
66
67	File.open( path, 'r' ) do \| io \|
68	print "Calculating MD5 checksum for file '#{ File.basename( path ) }'"
69	counter = 0
70
71	while ( ! io.eof )
72	putc '.' if ( ( counter += 1 ) % 3 == 0 )
73	buffer = io.readpartial( BUFFER_SIZE )
74	md5.update( buffer )
75	end
76
77	puts
78	end
79
80	return md5.hexdigest
81	end
82
83	# =============================================================================
84	# Return a hash describing a file at a given pathname. See the implementation
85	# for details of the hash contents.
86	# =============================================================================
87	#
88	def get_file_description( path )
89
90	# Parser: Various charaters, a dot, then: one or more digits (0-9)
91	# followed by an optional dot, repeated at least once, this whole
92	# assembly optional, recording only the collection of digits and
93	# dots, not individual digits-plus-dots sets ("(?:" => don't include
94	# this group in the match data). Then zero or more other characters,
95	# non-greedy.
96
97	mod = File.mtime( path )
98	leaf = File.basename( path )
99	regexp = /^(.?)\.((?:[0-9]+\.?)+)?(.?)$/
100	scanned = leaf.scan( regexp )[ 0 ]
101	base_name = scanned[ 0 ]
102	version = ( scanned[ 1 ] \|\| '' ).chomp( '.' ) # May have trailing '.'
103	filetype = scanned[ 2 ] \|\| ''
104
105	return {
106	:path => path,
107	:leaf => leaf,
108	:base_name => base_name,
109	:link => "#{ path[ leaf.length..-1 ] }?#{ mod.tv_sec }",
110	:mod => File.mtime( path ),
111	:filetype => filetype,
112	:version => version
113	}
114	end
115
116	# =============================================================================
117	# Return an array of items describing a directory contents and optionally
118	# the contents of any subdirectories as a flat unsorted list. The last
119	# parameter is 'false' to avoid scanning to a level beyond the current
120	# directory. The first two parameters are concatenated to generate the
121	# path of the directory to scan.
122	# =============================================================================
123	#
124	def recursive_directory_list( base, dir, recurse = true )
125
126	# Distantly based upon:
127	#
128	# http://www.oreillynet.com/onjava/blog/2006/03/recursive_directory_list_with.html
129
130	excludes = [ 'CVS', '.svn' ]
131	collect = [];
132	dir = File.join( base, dir )
133	first = true
134
135	Find.find( dir ) do \| path \|
136	if ( FileTest.directory?( path ) )
137	unless ( recurse )
138	if ( first )
139	first = false # Skip '.' / "this" directory
140	next
141	else
142	Find.prune # Don't descend into the directory
143	end
144	else
145	if ( excludes.include?( File.basename( path ) ) )
146	Find.prune # Don't descend into the directory
147	else
148	next
149	end
150	end
151	else
152	collect.push( get_file_description( path ) )
153	end
154	end
155
156	return collect
157	end
158
159	# =============================================================================
160	# Main processing loop.
161	# =============================================================================
162
163	ARGV.each do \| path \|
164
165	# Either read the contents of a given directory without recursion and look
166	# for a configuration folder inside it, or read a file and look for a
167	# configuration folder inside the directory in which the file resides.
168
169	puts "Processing '#{ path }'..."
170
171	if ( File.directory?( path ) )
172	collection = recursive_directory_list( path, '', false )
173	else
174	collection = [ get_file_description( path ) ]
175	path = File.dirname( path )
176	end
177
178	config_path = File.join( path, 'config', 'config.yml' )
179
180	puts "Using '#{ config_path }'"
181
182	config_data = YAML.load_file( config_path )
183	config_changed = false
184
185	for description in collection
186	path = description[ :path ]
187	leaf = description[ :leaf ]
188	base_name = description[ :base_name ]
189	last_mod = description[ :mod ]
190
191	info = config_data[ base_name ] \|\| config_data[ leaf ]
192
193	if ( info.nil? )
194	puts "WARNING: File '#{ leaf }' has no entry in config.yml"
195
196	info = {}
197	config_data[ base_name ] = info
198	end
199
200	if ( info[ 'md5' ].nil? \|\| info[ 'md5_time' ].nil? \|\| info[ 'md5_time' ] < last_mod )
201	info[ 'md5_time' ] = last_mod
202	info[ 'md5' ] = calculate_md5_checksum( path )
203	config_changed = true
204	else
205	puts "Checksum for file '#{ leaf }' is up to date."
206	end
207	end
208
209	if ( config_changed )
210	puts "Saving '#{ config_path }'"
211
212	sorted_data = config_data.sort do \| a, b \|
213
214	# Sorted hashes get converted to nested arrays of key/value pairs, so
215	# in our case we get a file leaf/base name at index 0 and the data hash
216	# at index 1 of each inner pair. Sort by group then base name.
217
218	( ( a[ 1 ][ 'group' ] \|\| '' ) <=> ( b[ 1 ][ 'group' ] \|\| '' ) ).nonzero? \|\|
219	( a[ 0 ] <=> b[ 0 ] )
220
221	end
222
223	# "YAML.dump" provides no sorting mechanism. Ruby 1.9 should help since
224	# hash ordering is well defined, but for Ruby 1.8 with undefined order of
225	# hash key enumeration, do it the hard way. Otherwise, the unsorted YAML
226	# output is hard for humans to read or hand-modify.
227
228	File.open( config_path, 'w' ) do \| io \|
229
230	# Write the file header.
231
232	io << "---\n"
233
234	# Each individual YAML string is returned as an individual file and has
235	# a file header line; output everything except this line.
236
237	old_group = nil
238
239	for pair in sorted_data
240	base_name = pair[ 0 ]
241	data = pair[ 1 ]
242	group = data[ 'group' ] \|\| ''
243
244	if ( old_group != group )
245	io << "\n\# #{ ( group.empty? ) ? 'Ungrouped' : group }\n\n"
246	old_group = group
247	end
248
249	io << ( { base_name => data } ).to_yaml.sub( /.*?\n/, '' )
250	end
251	end
252	end
253
254	puts "...Processing complete."
255
256	end
257
258	puts "Finished."

Download in other formats:

raw | text

Download in other formats:

Search tickets

Social

ROOL Store

Donate! Why?

RISC OS IPR

Options

RSS feeds