/* anagram.c - compute anagrams */ /* * Copyright (C) 1999 Roger Willcocks * rogerw@centipede.co.uk * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ /* * this is a fast anagram calculator. Given a dictionary file and a * phrase, it will find all the anagrams of the phrase. */ #include #include #include #define MAXWORD 10000 typedef struct _pattern { long x[4]; char *word; } pattern; const long mask = 0x8888888L; pattern phrase, word[MAXWORD]; int wordcount; pattern *limit; char buffer[30]; char *strsave(char *str) { char *ret = new char [strlen(str) + 1]; strcpy(ret, str); return ret; } extern "C" int callback(const void *p, const void *q) { pattern *pp = (pattern *)p; pattern *pq = (pattern *)q; int retval = strlen(pq->word) - strlen(pp->word); if (retval == 0) return strcmp(pp->word, pq->word); return retval; } char *soln[100]; int count; int trials, tests; void anagrate(pattern *pword, int depth) { if (depth == 4) return; if (phrase.x[0] == mask && phrase.x[1] == mask && phrase.x[2] == mask && phrase.x[3] == mask) { printf("%d (%d, %d) ", count++, trials, tests); for (int i = 0; i < depth; i++) printf("%s ", soln[i]); printf("\n"); return; } while (pword < limit) { if ((((phrase.x[0] - pword->x[0]) & mask) == mask) && (((phrase.x[1] - pword->x[1]) & mask) == mask) && (((phrase.x[2] - pword->x[2]) & mask) == mask) && (((phrase.x[3] - pword->x[3]) & mask) == mask)) { soln[depth] = pword->word; pattern tt = phrase; phrase.x[0] = tt.x[0] - pword->x[0]; /* -= would get optimised to use remembered */ phrase.x[1] = tt.x[1] - pword->x[1]; /* result from tests above; not wanted since */ phrase.x[2] = tt.x[2] - pword->x[2]; /* most tests (~ 39/40) will fail */ phrase.x[3] = tt.x[3] - pword->x[3]; anagrate(pword, depth + 1); phrase = tt; trials++; } pword++; tests++; } } void addchar(pattern *pp, int c) { if (c >= 'a' && c <= 'g') { pp->x[0] += (1L << (4 * (c - 'a'))); } else if (c >= 'h' && c <= 'n') { pp->x[1] += (1L << (4 * (c - 'h'))); } else if (c >= 'o' && c <= 'u') { pp->x[2] += (1L << (4 * (c - 'o'))); } else if (c >= 'v' && c <= 'z') { pp->x[3] += (1L << (4 * (c - 'v'))); } if ((pp->x[1] & mask) || (pp->x[1] & mask) || (pp->x[2] & mask) || (pp->x[3] & mask)) { fprintf(stderr, "too many '%c's in phrase\n", c); } } int main(int argc, char *argv[]) { // string is encoded as 26 * 4-bit fields // held internally as 4 * 32-bit ints // top bits of each set of four are 1, next three are count for that letter // subtract words - if top bit is still one, all's cool, otherwise // something underflowed and we can't use this word if (argc != 3) { fprintf(stderr, "usage: anagram \n"); return -4; } FILE *f = fopen(argv[1], "r"); char *p; if (f == 0) { fprintf(stderr, "can't open dictionary file '%s'\n", argv[1]); return -3; } p = argv[2]; while (*p) addchar(&phrase, *p++); phrase.x[0] |= mask; phrase.x[1] |= mask; phrase.x[2] |= mask; phrase.x[3] |= mask; while (fgets(buffer, sizeof(buffer), f)) { p = buffer; while (*p == ' ') p++; if (*p == '\n') continue; pattern tpat; tpat.x[0] = 0; tpat.x[1] = 0; tpat.x[2] = 0; tpat.x[3] = 0; while (*p && *p != '\n') addchar(&tpat, *p++); *p = 0; if ((((phrase.x[0] - tpat.x[0]) & mask) == mask) && (((phrase.x[1] - tpat.x[1]) & mask) == mask) && (((phrase.x[2] - tpat.x[2]) & mask) == mask) && (((phrase.x[3] - tpat.x[3]) & mask) == mask)) { tpat.word = strsave(buffer); /* word is subset of phrase */ word[wordcount++] = tpat; /* so keep it */ } } fclose(f); qsort(word, wordcount, sizeof(pattern), &callback); for (int x = 0; x < wordcount; x++) printf("%s ", word[x].word); printf("\n\n%d words\n", wordcount); limit = &word[wordcount]; anagrate(&word[0], 0); return 0; } /* end */