% Filfrag.m % % Plots block occupation of files (output of `filefrag` command) % % @param file Path of a file containing the output of `filefrag -e -b32768 *.*` % @param edgecolor EdgeColor for #pcolor(...) (e.g.: 'none', 'black') % @param filenamePattern Regexp capturing the part of the file to be used for sorting on group #1 % @param numerical If true the captured part is treated as a numerical value % (otherwise sorting is done alphabetically) function Filefrag (file, edgecolor, filenamePattern, numerical) % Default values if nargin < 2 edgecolor = 'black' endif if nargin < 3 # For Canon image/video files (incl. those changed by exiftool, which adds "_original") filenamePattern = '(?:[A-Za-z0-9_\-]+/)*(?:IMG|MVI)_(\d{4})\.(?:JPG|MP4)(?:\_original)?'; numerical = true; endif % Read file fid = fopen(file, 'rb'); content = textscan(fid, '%s', 'Delimiter', ''); fclose(fid); % For some reason all lines are enclosed in the only element of 'content' lines = content{1}; % Create one big string str = strjoin(lines,'\n'); % Somehow trailing newline characters are missing (but we need one for the regexp to work) str = sprintf([str, '\n']); % Regexp capturing the % - filename in #group1 % - all fragment lines in #group2 % TODO For readability split that into multiple source code lines with 'sprintf(...)' (that requires to escape all "\") pattern = 'File size of (?:[A-Za-z0-9_\-]+/)*(\.?[A-Za-z0-9\-\_]+\.[A-Za-z0-9\-\_]+) is \d+ \(\d+ blocks of \d+ bytes\)\n\s*ext: logical_offset: physical_offset: length: expected: flags:\n((?:\s*\d+:\s+\d+\.\.\s+\d+:\s+\d+\.\.\s+\d+:\s+\d+:\s+(?:\d+:\s+)?merged(?:\,eof)?\n|Discontinuity: Block \d+ is at \d+ \(was \d+\)\n)+)\.?(?:[A-Za-z0-9_\-]+/)*[A-Za-z0-9\-\_]+\.[A-Za-z0-9\-\_]+: \d+ extents? found\n'; tokens = regexp(str, pattern, 'tokens'); % Get ordering according to the image/video number if numerical ordering = getNumericalOrdering(tokens, filenamePattern); else ordering = getAlphabeticalOrdering(tokens, filenamePattern); end % Parse the parts for each file % (iterate of 'tokens' in the 'ordering' found above) [filenames, matrix] = parseFileFragments(tokens, ordering); f = length(filenames); % First block, last block (over all files) a = min(matrix(:,1)); b = max(matrix(:,2)); % Create and fill output matrix out = expand(matrix, f, a, b); % DEBUG Show gaps % all = out(1,:); length(all) % idx = [1:length(all)]; % gap = idx(all==0) % Display plot2(file, matrix, out, filenames, f, a, b, rainbow(f), edgecolor); end % INTERNAL ===================================================================== % Get ordering according to (a part of the) filename (string) % % @param tokens Tokens parsed from the filefrag output % @param pattern Regexp that captures the relevant part of the filename in group #1 % @return ordering ordering(i) -> index of i-th token function ordering = getAlphabeticalOrdering (tokens, pattern) strings = {}; for token = tokens match = token{1}; filename = match{1}; nameTokens = regexp(filename, pattern, 'tokens'); strings{end+1} = nameTokens{1}{1}; endfor [sorted, ordering] = sort(strings); end % Get ordering according to (a part of the) filename (number) % % @param tokens Tokens parsed from the filefrag output % @param pattern Regexp that captures the relevant part of the filename in group #1 % @return ordering ordering(i) -> index of i-th token function ordering = getNumericalOrdering (tokens, pattern) numbers = {}; for token = tokens match = token{1}; filename = match{1}; nameTokens = regexp(filename, pattern, 'tokens'); number = str2double(nameTokens{1}{1}); numbers{end+1} = number; endfor sorted = cat(1, numbers{:}); [sorted, ordering] = sort(sorted); end % Parse the parts for each file % (iterate of 'tokens' in the 'ordering' found above) % % @param tokens Tokens parsed from the filefrag output % @param ordering See #getOrdering(...) % @return filenames All filenames (first filename is a dummy "all" entry) % @return matrix For all fragments of all files - columns: % 1: first block, % 2: last block, % 3: length, % 4: file index + decimals indicating different fragments of the same file function [filenames, matrix] = parseFileFragments (tokens, ordering) subPattern = '\s*\d+:\s+\d+\.\.\s+\d+:\s+(\d+)\.\.\s+(\d+):\s+(\d+):\s+(?:\d+:\s+)?merged(?:\,eof)?'; filenames = {'all'}; occupations = {}; for f = 1:length(tokens) match = tokens(1,ordering(f)){1}; filename = match{1}; segments = match{2}; subTokens = regexp(segments, subPattern, 'tokens'); % All values of all rows in one vector values = str2double([subTokens{:}]); % Reshape to start a new row after each 3 values (start, end, length) fragments = reshape(values, 3, [])'; % Add the file index + different decimals for each fragment n = size(fragments, 1); % Number of fragments fragments(:, 4) = f + [0:n-1]' / n; filenames{end+1} = filename; occupations{end+1} = fragments; endfor f = f + 1; % (+1 for the "all" row) % All block start, end, length and file indices for all segments % If a file consists of N segments then N rows have the same file index in the fouth column matrix = cat(1, occupations{:}); end % Create and fill output matrix % % @param matrix see #getFragments(...) % @param f number of files + 1 % @param a min block % @param b max block % @return out % Values: % - 0: not occupied, % - >=1: occupied by this filenumber % Rows: % - First row all files % - Row r+1 for file r % Columns: % - Block number function out = expand (matrix, f, a, b) out = zeros(f, b - a + 1); for r = 1:size(matrix, 1) cols = matrix(r, 1):matrix(r, 2); out(1, cols - a + 1) = matrix(r, 4); % First row (all files) out(floor(matrix(r, 4)) + 1, cols - a + 1) = matrix(r, 4); % Individual row endfor end % PLOT ------------------------------------------------------------------------- % Plot % % @param file filefrag output file % @param matrix See # parseFileFragments(...) % @param out See #expand(...) % @param filenames Filenames (where the first is an extra "all" entry) % @param f Number of files + 1 (+1 for an extra row for "all") % @param a First block number % @param b Last block number % @param cmap Color map (with at least f-1 colors) % @param edgecolor EdgeColor for #pcolor(...) (e.g.: 'none', 'black') function plot2 (file, matrix, out, filenames, f, a, b, cmap, edgecolor) % Transform for #pcolor(...) [reduced, x, y] = transform (out, f); graph = pcolor(x, y, reduced); % Using 'floor(...)' to get rid of the decimals indicating a multi-segment file set(graph, 'EdgeColor', edgecolor); set(gca, 'TickDir', 'out', 'TickLength', [0, 0]); set(gca, 'YTick', [1:1:f] + 0.5, 'YTickLabel', strrep(filenames,'_','\_')); % (normal underscores make the next character subscript) xTicksPos = reshape([matrix(:, 1) - a, matrix(:, 2) - a + 1], [], 1); set(gca, 'XTick', xTicksPos + 1.5, 'XTickLabel', xTicksPos + a); set(gca, 'Title', strrep(file, '_', '\_')); % (normal underscores make the next character subscript) colormap(gca, cmap); end % Transforms the expanded matrix to be plotted with #pcolor(...) % - Adds an empty row and an empty column % - reduces identical neighboring columns to just one column % % @param out See #expand(...) % @param f number of files + 1 % @return reduced matrix for #pcolor(...) % @return x x paramteter for #pcolor(...) % @return y y paramteter for #pcolor(...) function [reduced, x, y, filter] = transform (out, f) diff = out(:, 2:end) - out(:, 1:end-1); % Diffs of neighboring columns filter = [1, max(abs(diff), [], 1), 1]; % 0: skip column, >0: Columns to keep % For 'pcolor(...)' the matrix needs an extra row and column extended = out; extended(end+1, :) = 0; extended(:, end+1) = 0; reduced = extended(:, filter > 0.00001); reduced(reduced == 0) = NaN; % NaN -> no color x = [1:length(filter)]; % All block numbers x = x(filter > 0.00001); % Filter x coordinates just like reduced y = [1:f+1]; end % XXX .......................................................................... % Old plot function using #imagesc(...) with "manually" created horizontal and vertical lines function plot1 (file, matrix, out, filenames, f, a, b, cmap, edgecolor) % Plot (imagesc) graph = imagesc(out); hold on; % Vertical lines plot([0, b-a]',repmat([1:f]'+0.5,1,2), 'color', [0,0,0]); set(gca, 'YTick', 1:1:f, 'YTickLabel', strrep(filenames,'_','\_')); % (normal underscores make the next character subscript) % Horizontal lines x = reshape([matrix(:, 1) - a, matrix(:, 2) - a + 1], [], 1); plot(repmat(x + 0.5, 1, 2), [0, f + 0.5]', 'color', edgecolor); set(gca, 'XTick', x, 'XTickLabel', x + a); hold off; set(gca, 'TickDir', 'out', 'TickLength', [0, 0]); set(gca, 'Title', strrep(file, '_', '\_')); % (normal underscores make the next character subscript) % Configure colormap cmap(1, :) = [1,1,1].*1; % (background color for 0 values) colormap(gca, cmap); end