diff/cmp for remote files

December 24, 2009

rdiff, rcmp – diff remote files

rdiff and rcmp extends the diff and cmp utilities to remote files.
Each file or directory argument is either a remote filename of the form [[user@]host1:]file, or a local filename.

Note: the scp program is used to retrieve a remote file. Setting up ssh authentication (~/.ssh/authorized_keys on the remote host ) may be required. Google ‘ssh authorized keys’ for info.
Note2: rdiff can be hardlink’d to rcmp (ie: ln rdiff rcmp). The filename is used to determine functionality.

Example usages:
    $ rdiff -b produser@prodhost:/home/prod/bin/xyz.sh xyz.sh
    $ rcmp produser@prodhost:/home/prod/javalib/xyz.jar $HOME/dev/java/lib/xyz.jar
    $ rcmp ruser@rhost:/usr/local/abc:def /usr/local/abc:def

#! /bin/sh
#  Name: 
#  Synopsis:    rdiff [-b] [[user@]host1:]file1 [[user@]host2:]file2
#               rcmp [[user@]host1:]file1 [[user@]host2:]file2
#  Description: Run diff/cmp on remote files.  
#               Note: This script uses scp or rcp to copy remote files.
#                     (~/.ssh/.authorized_keys, or ~/.rhosts may need to be configured)
#               Note2: Simple pattern matching is used to determine if a file is remote or local 
#                      The filename matches a "user@host:" or "host:" prefix pattern.  To force
#                      a match for a local file, specify the full path with the leading '/'.

getfile() 
{
        if perl -e 'exit !(($ARGV[0] =~ /.*\@.*:\/.*/) || ($ARGV[0] =~ /.*:\/.*/)) &&
                         !($ARGV[0] =~ /\/.*/);' $1; then
                scp "$1" "$2"
                # rcp "$1" "$2"
        else
                ln "$1" "$2";  # This is slight overkill, after all we already
                               # have the file.  But, it simplifies file cleanup...
        fi
}


# Main
case $0 in
  *rdiff) cmd=diff;;
  *rcmp) cmd=cmp;;
esac

while getopts b opt; do
        case $opt in
          b) options="${options} -b";;
        esac
done
shift `expr $OPTIND - 1`

TMPDIR=$HOME/.tmp
tmpfile1=$TMPDIR/r${cmd}1.$$
tmpfile2=$TMPDIR/r${cmd}2.$$
trap "rm -f $tmpfile1 $tmpfile2" 0 15

if getfile "$1" "$tmpfile1" && getfile "$2" "$tmpfile2"; then
        eval $cmd $options $tmpfile1 $tmpfile2
fi

clone.c

December 12, 2009

Here’s a fun, short program.
The primary goal is to write a program that outputs the source code of the program when it is executed (ie: clone itself). A secondary goal is to make the program as small as possible.

Here’s a sh/ksh/csh/bash implementation:
cat $0

Here’s a version written in C:
char *s="char *s=%c%s%c; main() { printf(s,34,s,34); putchar(0x0a); }"; main() { printf(s,34,s,34); putchar(0x0a); }

getopts.java is a simple parser for java. It is based on the Unix getopts shell utility of the same name.

Example usage: See main() below.

/* Name:        $Id$
 * Description: getopts is a simple command line parser based on the getopts shell parser.
 */

package HHjlib;

public class getopts
{
        private String argv[];
        private int argc;
        private int optind = 0;
        private int optind2 = 0;
        private String optarg;

        /** Initialize the getopts parser with the command line argument array */
        public getopts(String args[])
        {
                argv = args;
                argc = args.length;
                optind = 0;
                optind2 = 1;
        }


        /** The getOption() method parses positional parameters.
The optstring parameter contains the option characters to be recognized; If a character is followed by a colon, the option is required to have an argument.
Note: The colon and question mark characters may not be used as option characters. Each time it is invoked, getOption() returns the next option. If an option requires an argument, the argument may be retrieved using the getOptionArg() method after getOption() processing. If an invalid option is seen, getOption() returns a '?'. When the end of options is encountered, getopts returns the null character. */ public char getOption(String optstring) { if (optind >= argc || argv[optind].charAt(0) != '-') return ''; char argv_option = argv[optind].charAt(optind2); int optlen = optstring.length(); for (int indx = 0; indx < optlen; ++indx) { char opt = optstring.charAt(indx); if (argv_option == opt) { int argv_length = argv[optind].length(); if ((indx + 1 < optlen) && (optstring.charAt(indx + 1) == ':')) { if (optind2+1 < argv_length) { optarg = argv[optind].substring(optind2+1); optind++; optind2 = 1; } else if (optind+1 < argc) { optarg = argv[optind+1]; optind += 2; optind2 = 1; } else { optarg = Character.toString(opt); return ':'; } } else { if (optind2 + 1 < argv_length) { optind2++; } else { optind++; optind2 = 1; } } return opt; } } optarg = Character.toString(argv_option); return '?'; } /** Return a cmdline option argument or the option character if command line parsing failed */ public String getOptionArg() { return optarg; } /** Return the index of the next cmdline array element to be processed */ public int getOptionIndex() { return optind; } public static void main(String args[]) { getopts cmdline = new getopts(args); char option; while ((option = cmdline.getOption("abcd:e:")) != '') { switch(option) { case 'a': case 'b': case 'c': System.out.println("Option='" + option + "'"); break; case 'd': case 'e': System.out.println("Option='" + option + "', Argument='" + cmdline.getOptionArg() + "'"); break; case '?': System.err.println("Error: Invalid option '-" + cmdline.getOptionArg() + "'"); System.exit(1); case ':': System.err.println("Error: Missing option argument for '-" + cmdline.getOptionArg() + "'"); System.exit(1); } } for (int indx = cmdline.getOptionIndex(); indx < args.length; ++indx) { System.out.println(args[indx]); } } } /* :!javac % :!java `basename % .java` -a abc :!java `basename % .java` -a -b a b c :!java `basename % .java` -ab -c a b c :!java `basename % .java` -a -bc a b c :!java `basename % .java` -abc arg1 arg2 :!java `basename % .java` -ddarg arg1; # error: missing option argument :!java `basename % .java` -d darg arg1; # error: missing option argument :!java `basename % .java` -ddarg arg1 -e earg; # error: missing option argument :!java `basename % .java` -q; # Invalid option :!java `basename % .java` -d; # error: missing option argument */

Here’s a very simple utility program that I use to debug how the shell interpreter parses quoted arguments in shell scripts. Shell quoted strings can be one of the trickiest things to get right in a script. White-space characters in filenames and directory names, string concatenation, nested single, double, back-tick quotes are only a few of the complications!

Note: To compile the code    $ cc printargs.c -o printargs

Here’s an example of debugging using printargs:
  $ cp $file $target

This works fine as long as the $file or $target name does not contain any white space characters. But if they do, the command will fail. For example: turn on trace ‘set -x’ and set $file=’Star Trek IV.mp4′. The trace will display ‘+ cp Star Trek IV.mp4 destdir’ which looks right, but actually is wrong. Let’s prefix the cp command with printargs to see why.

$ file=’Star Trek IV.mp4′; target=destdir
$ printargs cp $file $target
+ printargs cp Star Trek IV.mp4 destdir
1 : ‘cp’
2 : ‘Star’
3 : ‘Trek’
4 : ‘IV.mp4′
5 : ‘destdir’

The copy command is actually trying to copy three files {Star, Trek, IV.mp4} to the dest directory. It should be just one file (‘Star Trek IV.mp4′). The fix is to quote both $file and $target ie: cp “$file” “$target”.

$ file=’Star Trek IV.mp4′; target=destdir
$ printargs cp “$file” “$target”
1 : ‘cp’
2 : ‘Star Trek IV.mp4′
3 : ‘destdir’

After debugging, remove the printargs prefix from the command and the script should be good to go.

/* Name:        %I%
 * Synopsis:    printargs -c -i -q commandline
 * Description: printargs displays cmdline arguments.  This is useful for debugging 
 *              shell scripts that escape the space, backslash, quote chars ('"`)
 *              and other wildcard characters (eg: "[*?]").
 *                Options: -c  Echo the command line
 *                         -i  Do not display argument indices
 *                         -q  Do not display single quotes surrounding each argument
 */

#include 
#include 

main(int argc, char *argv[])
{
        int opt;
        extern int optind;
        int echo_cmdline = 0; 
        int display_indices = 1; 
        int indx;
        char *quotes = "'";
        char *spaces = "";

        while ((opt = getopt(argc, argv, "ciq")) != EOF) {
                switch(opt) {
                case 'c': 
                        echo_cmdline = 1;
                        break;
                case 'i':
                        display_indices = 0;
                        break;
                case 'q':
                        quotes = "";
                        break;
                default:
                        fprintf(stderr, "%s: Invalid option '%c'\n", opt);
                        fprintf(stderr, "Syntax: %s [-ciq] command command_options command_args ..."
);
                        exit(1);
                }
        }

        --argc;
        if (echo_cmdline == 1) {
                printf("CmdLine: ");
                for (indx = optind; indx <= argc; ++indx)
                        printf("%s%c", argv[indx], indx < argc ? ' ' : '\n');
        }

        for (indx = optind; indx <= argc; ++indx) {
                if (display_indices == 1)
                        printf("%d : ", indx - optind + 1);
                printf("%s%s%s\n", quotes, argv[indx], quotes);
        }

        exit(0);
}

Generate an index (or tag) file of AVS functions to allow these items to be easily located by a text editor (vi).

/*  Synopsis:     avstags [ avs_file ...]
 *  Description:  Create vi Style tags for OpenLink Endur/Findur AVS files
 */

%{
#include 
#include 
#include 
#include "emsg.h"
static void reset_input_line();
%}
%option noyywrap

  static char *avs_filename = "";
  static int braces = 0, comment = 0;
  static char identifer[32 + 1], input_line[2048];
  static int id_indx = 0, line_indx = 0;
  static int ch, prev_ch;
  static char tag[sizeof(identifer)], tag_line[sizeof(input_line)];
  static int tagline_indx = 0, tag_flag = 0, tagline_flag = 0;

%%

\/\*    { comment = 1; }
\*\/    { comment = 0; }
\/\/    { strcat(input_line, yytext); line_indx+= yyleng;
          while ((ch = input()) != '\n')
             input_line[line_indx++] = ch;
          reset_input_line();
        }
\{      { if (comment == 0) ++braces;  input_line[line_indx++] = yytext[0]; }
\}      { if (comment == 0) --braces;  input_line[line_indx++] = yytext[0]; }
\"      { if (comment == 0) {
             ch = yytext[0];
             do {
                input_line[line_indx++] = ch;
                prev_ch = ch;
             } while (!(((ch = input()) == '"') && prev_ch != '\\'));
          }
        }
\(      { if (comment == 0 && braces == 0) {
             tagline_flag = 1;
             strcpy(tag, identifer);  
          }
          input_line[line_indx++] = yytext[0];
          debug2_printf(("Matched '(' tag='%s', braces=%d, comment=%d\n", tag, braces, comment));
        }
\)      { input_line[line_indx++] = yytext[0];
          debug2_printf(("Matched ')' tag='%s', braces=%d, comment=%d\n", tag, braces, comment));
          while (comment == 0 && braces == 0 && ((ch = input()) != EOF)) {
             input_line[line_indx++] = ch;
             if (ch == '{') {
                tag_flag = 1;
                ++braces;
                break;
             } else if (ch == ';') {
                tag_flag = 0;
                break;
             } else if (ch == '\n') {
                reset_input_line();
             }
          }
        }
[a-zA-Z_][a-zA-Z0-9_]*   {
          strncpy(identifer, yytext, sizeof(identifer) - 1);
          strcat(input_line, yytext);
          line_indx += yyleng;
          debug2_printf(("ident='%s'\n", identifer));
        }
.       { input_line[line_indx++] = yytext[0]; }
\n      { reset_input_line(); }
%%


static void
reset_input_line()
{
        debug_printf(("reset_input_line() tagline_flag=%d, tag_flag=%d, tag='%s', tag_line='%s'\n", tagline_flag, tag_flag, tag, tag_line));
        if (tagline_flag)
                strcpy(tag_line, input_line);  
        if (tag_flag) {
                printf("%s\t%s\t/^%s$\n", tag, avs_filename, tag_line);
        }
        tagline_flag = tag_flag = 0;
        memset(input_line, 0, sizeof(input_line)); 
        line_indx = 0;
}


static void
avstags(char *filename, FILE *fp)
{
        avs_filename = filename;
        yyin = fp;
        yylex();
}


main(int argc, char *argv[])
{
        int indx;
        FILE *fp;

        if (argc == 1) {
                avstags("", stdin);
        } else {
                for (indx = 1; indx  tags
*/

SQL pretty printer

November 9, 2009

A very simple SQL pretty printer.

Example usage:
  $ ppsql query1.sql
  $ cvs co -p query.sql | ppsql

Compilation instructions:
  1 $ lex ppsql.l
  2 $ cc -g lex.yy.c -o ppsql

static char rev[] = "#(@) $Id: ppsql.l,v 1.8 2005/04/28 13:48:55 howard Exp $";
/* Synsopsis     ppsql [file_containing_SQL_statement ...]
 * Description:  A simple SQL Pretty Printer
 */

%{
#include <stdio.h>
#include <string.h>

enum {
        TOKEN_SELECT = 1,
        TOKEN_IDENTIFIER,
        TOKEN_COMMA,
        TOKEN_FROM,
        TOKEN_WHERE,
        TOKEN_AND,
        TOKEN_OR,
        TOKEN_OPAREN,
        TOKEN_CPAREN,
        TOKEN_STRING,
        TOKEN_INTEGER,
        TOKEN_DOUBLE,
        TOKEN_OPERATOR,
        N_TOKENS
};
%}
%option noyywrap

%%

select                  { return TOKEN_SELECT; }
","                     { return TOKEN_COMMA; }
\".*\"                  { return TOKEN_STRING; }
\'.*\'                  { return TOKEN_STRING; }
[0-9]+                  { return TOKEN_INTEGER; }
[0-9]+"."[0-9]*         { return TOKEN_DOUBLE; }
from                    { return TOKEN_FROM; }
where                   { return TOKEN_WHERE; }
and                     { return TOKEN_AND; }
or                      { return TOKEN_OR; }
"("                     { return TOKEN_OPAREN; }
")"                     { return TOKEN_CPAREN; }
[a-zA-Z][a-zA-Z0-9_.]*  { return TOKEN_IDENTIFIER; }
"+"|"-"|"*"|"/"|"="|""|"!="|">"|"<"|"="    {
                        return TOKEN_OPERATOR; }

[ \t\n]+                /* Eat Whitespace */

.                       printf("?%c?", yytext[0] );   /* Hmmm, what's this? */

%%
typedef enum {
        BEGIN_STATE,
        PARSE_SELECT,
        PARSE_FROM,
        PARSE_WHERE,
        END_STATE,
} parse_state;

typedef struct int_stack_s {
        int size;
        int top;
        int stack[1];  /*  Actually int stack[size];   ref: new_int_stack()  */
} int_stack_t;


int_stack_t *
new_int_stack(int size)
{
        int_stack_t *stack;

        if (stack = malloc(sizeof(int_stack_t) + (size - 1) * sizeof(int))) {
                stack->size = size;
                stack->top = 0;
        }
        return stack;
}


static int
push(int_stack_t *stack, int val)
{
        if (stack->top size) {
                stack->stack[stack->top++] = val;
                return stack->top;
        }
        return -1;
}


static int
pop(int_stack_t *stack, int *val) 
{
        if (stack->top > 0) {
                *val = stack->stack[--stack->top];
                return stack->top;
        }
        return -1;
}


ppsql(FILE *fp_in)
{
        int_stack_t *indent_stack = new_int_stack(255);
        parse_state state;
        int token_type, space_flag;
        int indent = 0, col = 0;

        yyin = fp_in;
        state = BEGIN_STATE;

        while (token_type = yylex()) {
                switch(token_type) {
                case TOKEN_SELECT:
                        fputs("select ", stdout);
                        indent = col;
                        col += 7;
                        space_flag = 0;
                        state = PARSE_SELECT;
                        break;
                case TOKEN_FROM:
                case TOKEN_WHERE:
                case TOKEN_AND:
                case TOKEN_OR:
                        fputc('\n', stdout);
                        for (col = 0; col < indent; ++col) { fputc(' ', stdout); }
                        if (col == 0) {
                                if (token_type == TOKEN_AND) {
                                        fputs("  ", stdout);
                                        col += 2;
                                } else if (token_type == TOKEN_OR) {
                                        fputs("   ", stdout);
                                        col += 3;
                                }
                        }
                        fputs(yytext, stdout);
                        fputc(' ', stdout);
                        col += (strlen(yytext) + 1);
                        space_flag = 0;
                        break;
                case TOKEN_OPAREN:
                        if (space_flag) { fputc(' ', stdout); }
                        push(indent_stack, indent);
                        fputs("( ", stdout);
                        col += (2 + space_flag);
                        indent = col;
                        space_flag = 0;
                        break;
                case TOKEN_CPAREN:
                        fputs(" )", stdout);
                        col += 2;
                        pop(indent_stack, &indent); 
                        space_flag = 1;
                        break;
                case TOKEN_COMMA:
                        /*fputs(((state == PARSE_SELECT) ? ",\n" : ", "), stdout);*/
                        fputs(", ", stdout);
                        col += 2;
                        space_flag = 0;
                        break;
                case TOKEN_OPERATOR:
                        fputc(' ', stdout);
                        fputs(yytext, stdout);
                        fputc(' ', stdout);
                        col += (strlen(yytext) + 2);
                        space_flag = 0;
                        break;
                default:
                        if (space_flag) { fputc(' ', stdout); }
                        fputs(yytext, stdout);
                        col += (strlen(yytext) + space_flag);
                        space_flag = 1;
                        break;
                }
        }
        printf("\n");
        return 0;
}


main( argc, argv )
int argc;
char **argv;
{
        int retcode;

        if (argc == 1) {
                retcode = ppsql(stdin);
        } else {
                int indx;
                FILE *fp;
                for (indx = 1; indx < argc; ++indx) {
                        if (fp = fopen(argv[indx], "r")) {
                                if (argc > 2) { printf("=== %s ===\n", argv[indx]); }
                                retcode = ppsql(fp);
                                fclose(fp);
                                if (retcode != 0)
                                        break;
                        }
                }
        }

        return retcode;
}


/*
:!lex % && cc -g lex.yy.c && rm -f lex.yy.c
:!echo 'select col1, string, (a+b)/c, literal from table , table2 where x = y and a = b or  c in (select distinct abc from def where alphabet = 1) and 1 =  1' | a.out

Sample SQL:
select distinct tran_schedule_delivery.  delivery_id, 455854
from tran_schedule_delivery, ab_tran, schedule_delivery_detail
where ( tran_schedule_delivery.delivery_id = schedule_delivery_detail.delivery_id
        and schedule_delivery_detail.deal_tracking_num = ab_tran.deal_tracking_num )
  and ( tran_schedule_delivery.volume_type <> 0
        and ( ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45001, 45002, 45006, 45149, 45160 )
                                         or base_ins_id in ( 45001, 45002, 45006, 45149, 45160 ) )
                and ab_tran.buy_sell = 0 )
           or ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45003, 45008, 45137, 45146 )
                                         or base_ins_id in ( 45003, 45008, 45137, 45146 ) ) )
           or ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45001, 45002, 45006, 45149, 45160 )
                                         or base_ins_id in ( 45001, 45002, 45006, 45149, 45160 ) )
                and ab_tran.buy_sell = 1 ) )
        and tran_schedule_delivery.gmt_start_date_time  &tt; '01-jan-2003 00:00:00'
        and tran_schedule_delivery.volume_type in ( 4 , 6 ) )
  or not exists ( select *
                  from query_result qr2
                  where qr2.unique_id = 455854
                    and qr2.query_result = tran_schedule_delivery.delivery_id )
*/

hexdump.c utility

November 9, 2009

Dump files out in hex format.
Useful for displaying binary files and debugging weird input/output.

Example usages:
  $ hd unicode.txt
  $ cat unicode.txt | hd

/* Name:         $Id: hd.c,v 1.1 2003/05/02 23:56:11 howard Exp $
 * Synopsis:     hd file ...
 * Description:  Dump files in hex/ascii format.
 */

#include <stdio.h>
#include <ctype.h>

#define SEP ' '
#define SEP2 ' '
#define SEP3 "  "


static void
chardump(int *buf, int len)
{
        int indx = 0;

        for (indx = 0; indx < len; ++indx) {
                if (isprint(buf[indx]))
                        putchar(buf[indx]);
                else
                        putchar('.');
        }
}


static int
nibble2hex(int nibble)
{
        switch(nibble) {
          case 0: return '0';
          case 1: return '1';
          case 2: return '2';
          case 3: return '3';
          case 4: return '4';
          case 5: return '5';
          case 6: return '6';
          case 7: return '7';
          case 8: return '8';
          case 9: return '9';
          case 10: return 'A';
          case 11: return 'B';
          case 12: return 'C';
          case 13: return 'D';
          case 14: return 'E';
          case 15: return 'F';
        }
        
}
static void
hexdump(int ch)
{
        putchar(nibble2hex((ch >> 4) & 0xF));
        putchar(nibble2hex(ch & 0xF));
        putchar(SEP);
}


static int
dump(FILE *fp)
{
        int indx = 0;
        int buf[16];

        memset(buf, 0, sizeof(buf));
        while ((buf[indx] = fgetc(fp)) != EOF) {
                hexdump(buf[indx++]);
                if (indx == 8)
                        putchar(SEP2);
                else if (indx == 16) {
                        fputs(SEP3, stdout);
                        chardump(buf, indx);
                        putchar('\n');
                        indx = 0;
                }
        }
        if (indx > 0) {
                int indx2;
                for (indx2 = indx; indx2 < 16; ++indx2) {
                        if (indx2 == 8) { putchar(SEP2); }
                        putchar(' '); putchar(' '); putchar(SEP);
                }
                fputs(SEP3, stdout);
                chardump(buf, indx);
                putchar('\n');
        }
        return 0;
}


main(int argc, char *argv[])
{
        int indx;
        FILE *fp;

        if (argc == 1) {
                dump(stdin);
        } else {
                for (indx = 1; indx < argc; ++indx) {
                        if (fp = fopen(argv[indx], "rb")) {
                                dump(fp);
                                fclose(fp);
                        }
                }
        }
        exit(0);
}
/*
:!cc % -o hd
:!gcc -g % -o hd
*/

Follow

Get every new post delivered to your Inbox.