a3238c20da27c3348d406066b595cd085e600558
   1/*
   2 * Totally braindamaged mbox splitter program.
   3 *
   4 * It just splits a mbox into a list of files: "0001" "0002" ..
   5 * so you can process them further from there.
   6 */
   7#include <unistd.h>
   8#include <stdlib.h>
   9#include <fcntl.h>
  10#include <sys/types.h>
  11#include <sys/stat.h>
  12#include <sys/mman.h>
  13#include <string.h>
  14#include <stdio.h>
  15#include <ctype.h>
  16#include <assert.h>
  17
  18static int usage(void)
  19{
  20        fprintf(stderr, "mailsplit <mbox> <directory>\n");
  21        exit(1);
  22}
  23
  24static int linelen(const char *map, unsigned long size)
  25{
  26        int len = 0, c;
  27
  28        do {
  29                c = *map;
  30                map++;
  31                size--;
  32                len++;
  33        } while (size && c != '\n');
  34        return len;
  35}
  36
  37static int is_from_line(const char *line, int len)
  38{
  39        const char *colon;
  40
  41        if (len < 20 || memcmp("From ", line, 5))
  42                return 0;
  43
  44        colon = line + len - 2;
  45        line += 5;
  46        for (;;) {
  47                if (colon < line)
  48                        return 0;
  49                if (*--colon == ':')
  50                        break;
  51        }
  52
  53        if (!isdigit(colon[-4]) ||
  54            !isdigit(colon[-2]) ||
  55            !isdigit(colon[-1]) ||
  56            !isdigit(colon[ 1]) ||
  57            !isdigit(colon[ 2]))
  58                return 0;
  59
  60        /* year */
  61        if (strtol(colon+3, NULL, 10) <= 90)
  62                return 0;
  63
  64        /* Ok, close enough */
  65        return 1;
  66}
  67
  68static int parse_email(const void *map, unsigned long size)
  69{
  70        unsigned long offset;
  71
  72        if (size < 6 || memcmp("From ", map, 5))
  73                goto corrupt;
  74
  75        /* Make sure we don't trigger on this first line */
  76        map++; size--; offset=1;
  77
  78        /*
  79         * Search for a line beginning with "From ", and 
  80         * having something that looks like a date format.
  81         */
  82        do {
  83                int len = linelen(map, size);
  84                if (is_from_line(map, len))
  85                        return offset;
  86                map += len;
  87                size -= len;
  88                offset += len;
  89        } while (size);
  90        return offset;
  91
  92corrupt:
  93        fprintf(stderr, "corrupt mailbox\n");
  94        exit(1);
  95}
  96
  97int main(int argc, char **argv)
  98{
  99        int fd, nr;
 100        struct stat st;
 101        unsigned long size;
 102        void *map;
 103
 104        if (argc != 3)
 105                usage();
 106        fd = open(argv[1], O_RDONLY);
 107        if (fd < 0) {
 108                perror(argv[1]);
 109                exit(1);
 110        }
 111        if (chdir(argv[2]) < 0)
 112                usage();
 113        if (fstat(fd, &st) < 0) {
 114                perror("stat");
 115                exit(1);
 116        }
 117        size = st.st_size;
 118        map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
 119        if (map == MAP_FAILED) {
 120                perror("mmap");
 121                close(fd);
 122                exit(1);
 123        }
 124        close(fd);
 125        nr = 0;
 126        do {
 127                char name[10];
 128                unsigned long len = parse_email(map, size);
 129                assert(len <= size);
 130                sprintf(name, "%04d", ++nr);
 131                fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
 132                if (fd < 0) {
 133                        perror(name);
 134                        exit(1);
 135                }
 136                if (write(fd, map, len) != len) {
 137                        perror("write");
 138                        exit(1);
 139                }
 140                close(fd);
 141                map += len;
 142                size -= len;
 143        } while (size > 0);
 144        return 0;
 145}