Challenge

Following my dismal performance in the technical portion of an interview last week, I decided that I needed to regularly work on programming tasks in order to maintain my technical skills. Unfortunately most of the courses I have been taking have given me few opportunities to flex my coding skills, and my personal projects have been dwindling in need of attention. Without a current project to work on, I decided to go back to the pythonchallenge buth this time, trying Rust, which is new to me. However, while rust is capable of fairly low level access, it just doesn’t scratch the same itch as C, so after the first couple of challenges, I decided to try to solve one in C.

First, in Rust; opne a file of garbage and sort out the lower-case letters:

use std::io::BufReader;
use std::io::BufRead;
use std::fs::File;

fn main() 
{
        let f = File::open("garbage.txt").unwrap();
        let file = BufReader::new(&f);
        for line in file.lines() 
        {
                let l = line.unwrap();
                for c in l.as_bytes() 
                {
                print!("{}", if (*c as char).is_alphanumeric() { 
                                                 *c as char} else {0 as char});
                } 
        }
        print!("\n");
}       

And then the same problem solved in pure C:

#include<stdio.h>


int main( void ) {
        static const char filename[] = "garbage.txt";
        FILE *file = fopen ( filename, "r" );
        if ( file != NULL )
        {
        char line [ 128 ];
                while ( fgets ( line, sizeof line, file ) != NULL )
                {
                        for (int i=0; i<sizeof line; i++){
                                if (line[i] > 96 && line[i] < 123)
                                {
                                        printf("%c",line[i]);
                                } 
                        }
      }
      printf("\n");
      fclose ( file );
   }
   else
   {
      perror ( filename );
   }
   return 0;
}       

They are probably both horribly hacky, but they both do the job. I will try to continue this in the weeks ahead.

Edit:

I was intrigued by the performance differences between these two solutions, so I did a little testing and found that the hacky c solution scans 1250 lines of the input text in 0.005 seconds while the rust solution takes 1.282 seconds. This inspired me to write a c version that used a regex like the rust solution does:

#include<stdio.h>
#include<regex.h>
#include<stdlib.h>
#include<string.h>
#include<sys/types.h>


int find(char * pattern, char * input  ) {
  regex_t regex;
  int reti;
  reti = regcomp(&regex, pattern, REG_EXTENDED);
  regmatch_t matches[2];
  char result [ 9 ];
  int search;
  /* Execute regular expression */
    search = regexec(&regex, input, 2, matches, 0);
    if (search == REG_NOMATCH) {}
    else {
      for (int i = 0; matches[i].rm_so != -1; i++)
      {
        int len = matches[i].rm_eo - matches[i].rm_so;
        memcpy(result, input + matches[i].rm_so + 4, 1);
        result[len] = 0;
        printf("%s",result);
      }
      regfree(&regex);
      return 0;
    }

}

// a C regex solution
int main( void ) {
  char line [ 128 ];
        static const char filename[] = "garbage2.txt";
        FILE *file = fopen ( filename, "r" );
        if ( file != NULL )
        {
    char pattern[] = "[a-z][A-Z]{3}[a-z][A-Z]{3}[a-z]";
                while ( fgets ( line, sizeof line, file ) != NULL )
                {
      find(pattern, line);
    }
    fclose ( file );
    printf("\n");
   }
   else
   {
      perror ( filename );
   }
   return 0;
}

The c solution with regex is still significantly faster than rust clocking in 0.044 seconds. Then, I was inspired by a friend to try to optimize the plain c version further, using bit-masking as tool pattern match multiple chars at a time. This was slightly problematic since the pattern is nine chars long, and a long long can only mask 8 chars; the solution was to pattern match the first 8 chars of the pattern with one bitmask, then use another smaller bitmask to check the final char in the pattern. Unfortunately this yielded no improvement over the original, clunkier version.

#include<stdio.h>

int main( void ) {
        static const char filename[] = "garbage2.txt";
        FILE *file = fopen ( filename, "r" );
        if ( file != NULL )
        {
        char line [128];
                while ( fgets ( line, sizeof line, file ) != NULL )
                {
                        char c;
                        char *in = line;
                        long long int test = 0;
                        long long int p1 = 0x2020202020202020;
                        long long int p2 = 0x2000000020;
                        for (int i=0; i<sizeof(line) - 9; i++){
                                c = in[4];
                                test = in[0] | (in[1] << 8) | (in[2] << 16) | (in[3] << 24) | ((long long) in[4] << 32) | ((long long) in[5] << 40) | ((long long) in[6] << 48) | ((long long) in[7] << 56);
                                long long int m = test & p1;
                                test = m ^ p2;
                                int test2 = (int)in[8] & 0x20;
                                if ((!test) && (test2)){
                                        printf("%c", c );
                                }
                                in = &in[1];
                        }
   }
         printf("\n");
     fclose ( file );
   }
   else
   {
      perror ( filename );
   }
   return 0;
}

 

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

w

Connecting to %s