clone.c

December 12, 2009

Here’s a fun, short program.
The primary goal is to write a program that outputs the source code of the program when it is executed (ie: clone itself). A secondary goal is to make the program as small as possible.

Here’s a sh/ksh/csh/bash implementation:
cat $0

Here’s a version written in C:
char *s="char *s=%c%s%c; main() { printf(s,34,s,34); putchar(0x0a); }"; main() { printf(s,34,s,34); putchar(0x0a); }

Advertisements

Here’s a very simple utility program that I use to debug how the shell interpreter parses quoted arguments in shell scripts. Shell quoted strings can be one of the trickiest things to get right in a script. White-space characters in filenames and directory names, string concatenation, nested single, double, back-tick quotes are only a few of the complications!

Note: To compile the code    $ cc printargs.c -o printargs

Here’s an example of debugging using printargs:
  $ cp $file $target

This works fine as long as the $file or $target name does not contain any white space characters. But if they do, the command will fail. For example: turn on trace ‘set -x’ and set $file=’Star Trek IV.mp4′. The trace will display ‘+ cp Star Trek IV.mp4 destdir’ which looks right, but actually is wrong. Let’s prefix the cp command with printargs to see why.

$ file=’Star Trek IV.mp4′; target=destdir
$ printargs cp $file $target
+ printargs cp Star Trek IV.mp4 destdir
1 : ‘cp’
2 : ‘Star’
3 : ‘Trek’
4 : ‘IV.mp4’
5 : ‘destdir’

The copy command is actually trying to copy three files {Star, Trek, IV.mp4} to the dest directory. It should be just one file (‘Star Trek IV.mp4’). The fix is to quote both $file and $target ie: cp “$file” “$target”.

$ file=’Star Trek IV.mp4′; target=destdir
$ printargs cp “$file” “$target”
1 : ‘cp’
2 : ‘Star Trek IV.mp4’
3 : ‘destdir’

After debugging, remove the printargs prefix from the command and the script should be good to go.

/* Name:        %I%
 * Synopsis:    printargs -c -i -q commandline
 * Description: printargs displays cmdline arguments.  This is useful for debugging 
 *              shell scripts that escape the space, backslash, quote chars ('"`)
 *              and other wildcard characters (eg: "[*?]").
 *                Options: -c  Echo the command line
 *                         -i  Do not display argument indices
 *                         -q  Do not display single quotes surrounding each argument
 */

#include 
#include 

main(int argc, char *argv[])
{
        int opt;
        extern int optind;
        int echo_cmdline = 0; 
        int display_indices = 1; 
        int indx;
        char *quotes = "'";
        char *spaces = "";

        while ((opt = getopt(argc, argv, "ciq")) != EOF) {
                switch(opt) {
                case 'c': 
                        echo_cmdline = 1;
                        break;
                case 'i':
                        display_indices = 0;
                        break;
                case 'q':
                        quotes = "";
                        break;
                default:
                        fprintf(stderr, "%s: Invalid option '%c'\n", opt);
                        fprintf(stderr, "Syntax: %s [-ciq] command command_options command_args ..."
);
                        exit(1);
                }
        }

        --argc;
        if (echo_cmdline == 1) {
                printf("CmdLine: ");
                for (indx = optind; indx <= argc; ++indx)
                        printf("%s%c", argv[indx], indx < argc ? ' ' : '\n');
        }

        for (indx = optind; indx <= argc; ++indx) {
                if (display_indices == 1)
                        printf("%d : ", indx - optind + 1);
                printf("%s%s%s\n", quotes, argv[indx], quotes);
        }

        exit(0);
}

Generate an index (or tag) file of AVS functions to allow these items to be easily located by a text editor (vi).

/*  Synopsis:     avstags [ avs_file ...]
 *  Description:  Create vi Style tags for OpenLink Endur/Findur AVS files
 */

%{
#include 
#include 
#include 
#include "emsg.h"
static void reset_input_line();
%}
%option noyywrap

  static char *avs_filename = "";
  static int braces = 0, comment = 0;
  static char identifer[32 + 1], input_line[2048];
  static int id_indx = 0, line_indx = 0;
  static int ch, prev_ch;
  static char tag[sizeof(identifer)], tag_line[sizeof(input_line)];
  static int tagline_indx = 0, tag_flag = 0, tagline_flag = 0;

%%

\/\*    { comment = 1; }
\*\/    { comment = 0; }
\/\/    { strcat(input_line, yytext); line_indx+= yyleng;
          while ((ch = input()) != '\n')
             input_line[line_indx++] = ch;
          reset_input_line();
        }
\{      { if (comment == 0) ++braces;  input_line[line_indx++] = yytext[0]; }
\}      { if (comment == 0) --braces;  input_line[line_indx++] = yytext[0]; }
\"      { if (comment == 0) {
             ch = yytext[0];
             do {
                input_line[line_indx++] = ch;
                prev_ch = ch;
             } while (!(((ch = input()) == '"') && prev_ch != '\\'));
          }
        }
\(      { if (comment == 0 && braces == 0) {
             tagline_flag = 1;
             strcpy(tag, identifer);  
          }
          input_line[line_indx++] = yytext[0];
          debug2_printf(("Matched '(' tag='%s', braces=%d, comment=%d\n", tag, braces, comment));
        }
\)      { input_line[line_indx++] = yytext[0];
          debug2_printf(("Matched ')' tag='%s', braces=%d, comment=%d\n", tag, braces, comment));
          while (comment == 0 && braces == 0 && ((ch = input()) != EOF)) {
             input_line[line_indx++] = ch;
             if (ch == '{') {
                tag_flag = 1;
                ++braces;
                break;
             } else if (ch == ';') {
                tag_flag = 0;
                break;
             } else if (ch == '\n') {
                reset_input_line();
             }
          }
        }
[a-zA-Z_][a-zA-Z0-9_]*   {
          strncpy(identifer, yytext, sizeof(identifer) - 1);
          strcat(input_line, yytext);
          line_indx += yyleng;
          debug2_printf(("ident='%s'\n", identifer));
        }
.       { input_line[line_indx++] = yytext[0]; }
\n      { reset_input_line(); }
%%


static void
reset_input_line()
{
        debug_printf(("reset_input_line() tagline_flag=%d, tag_flag=%d, tag='%s', tag_line='%s'\n", tagline_flag, tag_flag, tag, tag_line));
        if (tagline_flag)
                strcpy(tag_line, input_line);  
        if (tag_flag) {
                printf("%s\t%s\t/^%s$\n", tag, avs_filename, tag_line);
        }
        tagline_flag = tag_flag = 0;
        memset(input_line, 0, sizeof(input_line)); 
        line_indx = 0;
}


static void
avstags(char *filename, FILE *fp)
{
        avs_filename = filename;
        yyin = fp;
        yylex();
}


main(int argc, char *argv[])
{
        int indx;
        FILE *fp;

        if (argc == 1) {
                avstags("", stdin);
        } else {
                for (indx = 1; indx  tags
*/

SQL pretty printer

November 9, 2009

A very simple SQL pretty printer.

Example usage:
  $ ppsql query1.sql
  $ cvs co -p query.sql | ppsql

Compilation instructions:
  1 $ lex ppsql.l
  2 $ cc -g lex.yy.c -o ppsql

static char rev[] = "#(@) $Id: ppsql.l,v 1.8 2005/04/28 13:48:55 howard Exp $";
/* Synsopsis     ppsql [file_containing_SQL_statement ...]
 * Description:  A simple SQL Pretty Printer
 */

%{
#include <stdio.h>
#include <string.h>

enum {
        TOKEN_SELECT = 1,
        TOKEN_IDENTIFIER,
        TOKEN_COMMA,
        TOKEN_FROM,
        TOKEN_WHERE,
        TOKEN_AND,
        TOKEN_OR,
        TOKEN_OPAREN,
        TOKEN_CPAREN,
        TOKEN_STRING,
        TOKEN_INTEGER,
        TOKEN_DOUBLE,
        TOKEN_OPERATOR,
        N_TOKENS
};
%}
%option noyywrap

%%

select                  { return TOKEN_SELECT; }
","                     { return TOKEN_COMMA; }
\".*\"                  { return TOKEN_STRING; }
\'.*\'                  { return TOKEN_STRING; }
[0-9]+                  { return TOKEN_INTEGER; }
[0-9]+"."[0-9]*         { return TOKEN_DOUBLE; }
from                    { return TOKEN_FROM; }
where                   { return TOKEN_WHERE; }
and                     { return TOKEN_AND; }
or                      { return TOKEN_OR; }
"("                     { return TOKEN_OPAREN; }
")"                     { return TOKEN_CPAREN; }
[a-zA-Z][a-zA-Z0-9_.]*  { return TOKEN_IDENTIFIER; }
"+"|"-"|"*"|"/"|"="|""|"!="|">"|"<"|"="    {
                        return TOKEN_OPERATOR; }

[ \t\n]+                /* Eat Whitespace */

.                       printf("?%c?", yytext[0] );   /* Hmmm, what's this? */

%%
typedef enum {
        BEGIN_STATE,
        PARSE_SELECT,
        PARSE_FROM,
        PARSE_WHERE,
        END_STATE,
} parse_state;

typedef struct int_stack_s {
        int size;
        int top;
        int stack[1];  /*  Actually int stack[size];   ref: new_int_stack()  */
} int_stack_t;


int_stack_t *
new_int_stack(int size)
{
        int_stack_t *stack;

        if (stack = malloc(sizeof(int_stack_t) + (size - 1) * sizeof(int))) {
                stack->size = size;
                stack->top = 0;
        }
        return stack;
}


static int
push(int_stack_t *stack, int val)
{
        if (stack->top size) {
                stack->stack[stack->top++] = val;
                return stack->top;
        }
        return -1;
}


static int
pop(int_stack_t *stack, int *val) 
{
        if (stack->top > 0) {
                *val = stack->stack[--stack->top];
                return stack->top;
        }
        return -1;
}


ppsql(FILE *fp_in)
{
        int_stack_t *indent_stack = new_int_stack(255);
        parse_state state;
        int token_type, space_flag;
        int indent = 0, col = 0;

        yyin = fp_in;
        state = BEGIN_STATE;

        while (token_type = yylex()) {
                switch(token_type) {
                case TOKEN_SELECT:
                        fputs("select ", stdout);
                        indent = col;
                        col += 7;
                        space_flag = 0;
                        state = PARSE_SELECT;
                        break;
                case TOKEN_FROM:
                case TOKEN_WHERE:
                case TOKEN_AND:
                case TOKEN_OR:
                        fputc('\n', stdout);
                        for (col = 0; col < indent; ++col) { fputc(' ', stdout); }
                        if (col == 0) {
                                if (token_type == TOKEN_AND) {
                                        fputs("  ", stdout);
                                        col += 2;
                                } else if (token_type == TOKEN_OR) {
                                        fputs("   ", stdout);
                                        col += 3;
                                }
                        }
                        fputs(yytext, stdout);
                        fputc(' ', stdout);
                        col += (strlen(yytext) + 1);
                        space_flag = 0;
                        break;
                case TOKEN_OPAREN:
                        if (space_flag) { fputc(' ', stdout); }
                        push(indent_stack, indent);
                        fputs("( ", stdout);
                        col += (2 + space_flag);
                        indent = col;
                        space_flag = 0;
                        break;
                case TOKEN_CPAREN:
                        fputs(" )", stdout);
                        col += 2;
                        pop(indent_stack, &indent); 
                        space_flag = 1;
                        break;
                case TOKEN_COMMA:
                        /*fputs(((state == PARSE_SELECT) ? ",\n" : ", "), stdout);*/
                        fputs(", ", stdout);
                        col += 2;
                        space_flag = 0;
                        break;
                case TOKEN_OPERATOR:
                        fputc(' ', stdout);
                        fputs(yytext, stdout);
                        fputc(' ', stdout);
                        col += (strlen(yytext) + 2);
                        space_flag = 0;
                        break;
                default:
                        if (space_flag) { fputc(' ', stdout); }
                        fputs(yytext, stdout);
                        col += (strlen(yytext) + space_flag);
                        space_flag = 1;
                        break;
                }
        }
        printf("\n");
        return 0;
}


main( argc, argv )
int argc;
char **argv;
{
        int retcode;

        if (argc == 1) {
                retcode = ppsql(stdin);
        } else {
                int indx;
                FILE *fp;
                for (indx = 1; indx < argc; ++indx) {
                        if (fp = fopen(argv[indx], "r")) {
                                if (argc > 2) { printf("=== %s ===\n", argv[indx]); }
                                retcode = ppsql(fp);
                                fclose(fp);
                                if (retcode != 0)
                                        break;
                        }
                }
        }

        return retcode;
}


/*
:!lex % && cc -g lex.yy.c && rm -f lex.yy.c
:!echo 'select col1, string, (a+b)/c, literal from table , table2 where x = y and a = b or  c in (select distinct abc from def where alphabet = 1) and 1 =  1' | a.out

Sample SQL:
select distinct tran_schedule_delivery.  delivery_id, 455854
from tran_schedule_delivery, ab_tran, schedule_delivery_detail
where ( tran_schedule_delivery.delivery_id = schedule_delivery_detail.delivery_id
        and schedule_delivery_detail.deal_tracking_num = ab_tran.deal_tracking_num )
  and ( tran_schedule_delivery.volume_type <> 0
        and ( ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45001, 45002, 45006, 45149, 45160 )
                                         or base_ins_id in ( 45001, 45002, 45006, 45149, 45160 ) )
                and ab_tran.buy_sell = 0 )
           or ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45003, 45008, 45137, 45146 )
                                         or base_ins_id in ( 45003, 45008, 45137, 45146 ) ) )
           or ( ab_tran.ins_type in ( select id_number
                                      from instruments
                                      where id_number in ( 45001, 45002, 45006, 45149, 45160 )
                                         or base_ins_id in ( 45001, 45002, 45006, 45149, 45160 ) )
                and ab_tran.buy_sell = 1 ) )
        and tran_schedule_delivery.gmt_start_date_time  &tt; '01-jan-2003 00:00:00'
        and tran_schedule_delivery.volume_type in ( 4 , 6 ) )
  or not exists ( select *
                  from query_result qr2
                  where qr2.unique_id = 455854
                    and qr2.query_result = tran_schedule_delivery.delivery_id )
*/