Print this page
2944 cscope-fast/invlib.[ch] use reserved word as identifier
Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Gary Mills <gary_mills@fastmail.fm>
Reviewed by: Joerg Schilling <Joerg.Schilling@fokus.fraunhofer.de>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/tools/cscope-fast/invlib.c
+++ new/usr/src/tools/cscope-fast/invlib.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21 */
22 22 /* Copyright (c) 1988 AT&T */
23 23 /* All Rights Reserved */
24 24
25 25
26 26 /*
27 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28 28 * Use is subject to license terms.
29 29 */
30 30
31 -#pragma ident "%Z%%M% %I% %E% SMI"
32 -
33 31 #include <ctype.h>
34 32 #include <stdio.h>
35 33 #include <sys/types.h>
36 34 #include <sys/sysmacros.h>
37 35 #include <sys/byteorder.h>
38 36 #if SHARE
39 37 #include <sys/ipc.h>
40 38 #include <sys/shm.h>
41 39 #define ERR -1
42 40 #endif
43 41 #include "invlib.h"
44 42 #include "library.h"
45 43
46 44 #define DEBUG 0 /* debugging code and realloc messages */
47 45 #define BLOCKSIZE 2 * BUFSIZ /* logical block size */
48 46 #define LINEMAX 1000 /* sorted posting line max size */
49 47 #define POSTINC 10000 /* posting buffer size increment */
50 48 #define SEP ' ' /* sorted posting field separator */
51 49 #define SETINC 100 /* posting set size increment */
52 50 #define STATS 0 /* print statistics */
53 51 #define SUPERINC 10000 /* super index size increment */
54 52 #define TERMMAX 512 /* term max size */
55 53 #define VERSION 1 /* inverted index format version */
56 54 #define ZIPFSIZE 200 /* zipf curve size */
57 55 #define FREAD "r" /* fopen for reading */
58 56 #define FREADP "r+" /* fopen for update */
59 57 #define FWRITE "w" /* fopen truncate or create for writing */
60 58 #define FWRITEP "w+" /* fopen truncate or create for update */
61 59
62 60 extern char *argv0; /* command name (must be set in main function) */
63 61
64 62 int invbreak;
65 63
66 64 #if STATS
67 65 int showzipf; /* show postings per term distribution */
68 66 #endif
69 67
70 68 static POSTING *item, *enditem, *item1 = NULL, *item2 = NULL;
71 69 static unsigned setsize1, setsize2;
72 70 static long numitems, totterm, zerolong;
73 71 static char *indexfile, *postingfile;
74 72 static FILE *outfile, *fpost;
75 73 static unsigned supersize = SUPERINC, supintsize;
76 74 static int numpost, numlogblk, amtused, nextpost,
77 75 lastinblk, numinvitems;
78 76 static POSTING *POST, *postptr;
79 77 static unsigned long *SUPINT, *supint, nextsupfing;
80 78 static char *SUPFING, *supfing;
81 79 static char thisterm[TERMMAX];
82 80 static union {
83 81 long invblk[BLOCKSIZE / sizeof (long)];
84 82 char chrblk[BLOCKSIZE];
85 83 } logicalblk;
86 84
87 85 #if DEBUG || STATS
88 86 static long totpost;
89 87 #endif
90 88
91 89 #if STATS
92 90 static int zipf[ZIPFSIZE + 1];
93 91 #endif
94 92
95 93 static void invcannotalloc(size_t n);
96 94 static void invcannotopen(char *file);
97 95 static void invcannotwrite(char *file);
98 96 static int invnewterm(void);
99 97 static int boolready(void);
100 98
101 99 long
102 100 invmake(char *invname, char *invpost, FILE *infile)
103 101 {
104 102 unsigned char *s;
105 103 long num;
106 104 int i;
107 105 long fileindex;
108 106 unsigned postsize = POSTINC * sizeof (POSTING);
109 107 unsigned long *intptr;
110 108 char line[LINEMAX];
111 109 long tlong;
112 110 PARAM param;
113 111 POSTING posting;
114 112 #if STATS
115 113 int j;
116 114 unsigned maxtermlen = 0;
117 115 #endif
118 116 /* output file */
119 117 if ((outfile = vpfopen(invname, FWRITEP)) == NULL) {
120 118 invcannotopen(invname);
121 119 return (0);
122 120 }
123 121 indexfile = invname;
124 122 (void) fseek(outfile, (long)BUFSIZ, 0);
125 123
126 124 /* posting file */
127 125 if ((fpost = vpfopen(invpost, FWRITE)) == NULL) {
128 126 invcannotopen(invpost);
129 127 return (0);
130 128 }
131 129 postingfile = invpost;
132 130 nextpost = 0;
133 131 /* get space for the postings list */
134 132 if ((POST = (POSTING *)malloc(postsize)) == NULL) {
135 133 invcannotalloc(postsize);
136 134 return (0);
137 135 }
138 136 postptr = POST;
139 137 /* get space for the superfinger (superindex) */
140 138 if ((SUPFING = malloc(supersize)) == NULL) {
141 139 invcannotalloc(supersize);
142 140 return (0);
143 141 }
144 142 supfing = SUPFING;
145 143 supintsize = supersize / 40;
146 144 /* also for the superfinger index */
147 145 if ((SUPINT = malloc(supintsize * sizeof (long))) == NULL) {
148 146 invcannotalloc(supintsize * sizeof (long));
149 147 return (0);
150 148 }
151 149 supint = SUPINT;
152 150 supint++; /* leave first term open for a count */
153 151 /* initialize using an empty term */
154 152 (void) strcpy(thisterm, "");
155 153 *supint++ = 0;
156 154 *supfing++ = ' ';
157 155 *supfing++ = '\0';
158 156 nextsupfing = 2;
159 157 #if DEBUG || STATS
160 158 totpost = 0L;
161 159 #endif
162 160 totterm = 0L;
163 161 numpost = 1;
164 162
165 163 /*
166 164 * set up as though a block had come and gone, i.e., set up for
167 165 * new block
168 166 */
169 167 amtused = 16; /* leave no space - init 3 words + one for luck */
170 168 numinvitems = 0;
171 169 numlogblk = 0;
172 170 lastinblk = BLOCKSIZE;
173 171
174 172 /* now loop as long as more to read (till eof) */
175 173 while (fgets(line, LINEMAX, infile) != NULL) {
176 174 #if DEBUG || STATS
177 175 ++totpost;
178 176 #endif
179 177 s = (unsigned char *) strchr(line, SEP);
180 178 if (s == NULL) /* where did this line come from ??? */
181 179 continue; /* workaround: just skip it */
182 180 *s = '\0';
183 181 #if STATS
184 182 if ((i = strlen(line)) > maxtermlen) {
185 183 maxtermlen = i;
186 184 }
187 185 #endif
188 186 #if DEBUG
189 187 (void) printf("%ld: %s ", totpost, line);
190 188 (void) fflush(stdout);
191 189 #endif
192 190 if (strcmp(thisterm, line) == 0) {
193 191 if (postptr + 10 > POST + postsize / sizeof (POSTING)) {
194 192 i = postptr - POST;
195 193 postsize += POSTINC * sizeof (POSTING);
196 194 if ((POST = realloc(POST, postsize)) == NULL) {
197 195 invcannotalloc(postsize);
198 196 return (0);
199 197 }
200 198 postptr = i + POST;
201 199 #if DEBUG
202 200 (void) printf("reallocated post space to %u, "
203 201 "totpost=%ld\n", postsize, totpost);
204 202 #endif
205 203 }
206 204 numpost++;
207 205 } else {
208 206 /* have a new term */
209 207 if (!invnewterm()) {
210 208 return (0);
211 209 }
212 210 (void) strcpy(thisterm, line);
213 211 numpost = 1;
214 212 postptr = POST;
215 213 fileindex = 0;
216 214 }
217 215 /* get the new posting */
218 216 num = *++s - '!';
219 217 i = 1;
220 218 do {
221 219 num = BASE * num + *++s - '!';
222 220 } while (++i < PRECISION);
223 221 posting.lineoffset = num;
224 222 while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) {
225 223 ;
226 224 }
227 225 posting.fileindex = --fileindex;
228 226 posting.type = *++s;
229 227 num = *++s - '!';
230 228 if (*s != '\n') {
231 229 num = *++s - '!';
232 230 while (*++s != '\n') {
233 231 num = BASE * num + *s - '!';
234 232 }
235 233 posting.fcnoffset = num;
236 234 } else {
237 235 posting.fcnoffset = 0;
238 236 }
239 237 *postptr++ = posting;
240 238 #if DEBUG
241 239 (void) printf("%ld %ld %ld %ld\n", posting.fileindex,
242 240 posting.fcnoffset, posting.lineoffset, posting.type);
243 241 (void) fflush(stdout);
244 242 #endif
245 243 }
246 244 if (!invnewterm()) {
247 245 return (0);
248 246 }
249 247 /* now clean up final block */
250 248 logicalblk.invblk[0] = numinvitems;
251 249 /* loops pointer around to start */
252 250 logicalblk.invblk[1] = 0;
253 251 logicalblk.invblk[2] = numlogblk - 1;
254 252 if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
255 253 goto cannotwrite;
256 254 }
257 255 numlogblk++;
258 256 /* write out block to save space. what in it doesn't matter */
259 257 if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
260 258 goto cannotwrite;
261 259 }
262 260 /* finish up the super finger */
263 261 *SUPINT = numlogblk;
264 262 /* add to the offsets the size of the offset pointers */
265 263 intptr = (SUPINT + 1);
266 264 i = (char *)supint - (char *)SUPINT;
267 265 while (intptr < supint)
268 266 *intptr++ += i;
269 267 /* write out the offsets (1 for the N at start) and the super finger */
270 268 if (fwrite((char *)SUPINT, sizeof (*SUPINT), numlogblk + 1,
271 269 outfile) == 0 ||
272 270 fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {
273 271 goto cannotwrite;
274 272 }
275 273 /* save the size for reference later */
276 274 nextsupfing = sizeof (long) + sizeof (long) * numlogblk +
277 275 (supfing - SUPFING);
278 276 /*
279 277 * make sure the file ends at a logical block boundary. This is
280 278 * necessary for invinsert to correctly create extended blocks
281 279 */
282 280 i = nextsupfing % BLOCKSIZE;
283 281 /* write out junk to fill log blk */
284 282 if (fwrite(SUPFING, BLOCKSIZE - i, 1, outfile) == 0 ||
285 283 fflush(outfile) == EOF) {
286 284 /* rewind doesn't check for write failure */
287 285 goto cannotwrite;
288 286 }
289 287 /* write the control area */
290 288 rewind(outfile);
291 289 param.version = VERSION;
292 290 param.filestat = 0;
293 291 param.sizeblk = BLOCKSIZE;
294 292 param.startbyte = (numlogblk + 1) * BLOCKSIZE + BUFSIZ;
295 293 param.supsize = nextsupfing;
296 294 param.cntlsize = BUFSIZ;
297 295 param.share = 0;
298 296 if (fwrite((char *)¶m, sizeof (param), 1, outfile) == 0) {
299 297 goto cannotwrite;
300 298 }
301 299 for (i = 0; i < 10; i++) /* for future use */
302 300 if (fwrite((char *)&zerolong, sizeof (zerolong),
303 301 1, outfile) == 0) {
304 302 goto cannotwrite;
305 303 }
306 304
307 305 /* make first block loop backwards to last block */
308 306 if (fflush(outfile) == EOF) {
309 307 /* fseek doesn't check for write failure */
310 308 goto cannotwrite;
311 309 }
312 310 /* get to second word first block */
313 311 (void) fseek(outfile, (long)BUFSIZ + 8, 0);
314 312 tlong = numlogblk - 1;
315 313 if (fwrite((char *)&tlong, sizeof (tlong), 1, outfile) == 0 ||
316 314 fclose(outfile) == EOF) {
317 315 cannotwrite:
318 316 invcannotwrite(invname);
319 317 return (0);
320 318 }
321 319 if (fclose(fpost) == EOF) {
322 320 invcannotwrite(postingfile);
323 321 return (0);
324 322 }
325 323 --totterm; /* don't count null term */
326 324 #if STATS
327 325 (void) printf("logical blocks = %d, postings = %ld, terms = %ld, "
328 326 "max term length = %d\n", numlogblk, totpost, totterm, maxtermlen);
329 327 if (showzipf) {
330 328 (void) printf(
331 329 "\n************* ZIPF curve ****************\n");
332 330 for (j = ZIPFSIZE; j > 1; j--)
333 331 if (zipf[j])
334 332 break;
335 333 for (i = 1; i < j; ++i) {
336 334 (void) printf("%3d -%6d ", i, zipf[i]);
337 335 if (i % 6 == 0) (void) putchar('\n');
338 336 }
339 337 (void) printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);
340 338 }
341 339 #endif
342 340 /* free all malloc'd memory */
343 341 free(POST);
344 342 free(SUPFING);
345 343 free(SUPINT);
346 344 return (totterm);
347 345 }
348 346
349 347 /* add a term to the data base */
350 348
351 349 static int
352 350 invnewterm(void)
353 351 {
354 352 int backupflag, i, j, maxback, holditems, gooditems, howfar;
355 353 int len, numwilluse, wdlen;
356 354 char *tptr, *tptr2, *tptr3;
357 355 union {
358 356 unsigned long packword[2];
359 357 ENTRY e;
360 358 } iteminfo;
361 359
362 360 totterm++;
363 361 #if STATS
364 362 /* keep zipfian info on the distribution */
365 363 if (numpost <= ZIPFSIZE)
366 364 zipf[numpost]++;
367 365 else
368 366 zipf[0]++;
369 367 #endif
370 368 len = strlen(thisterm);
371 369 wdlen = (len + (sizeof (long) - 1)) / sizeof (long);
372 370 numwilluse = (wdlen + 3) * sizeof (long);
373 371 /* new block if at least 1 item in block */
374 372 if (numinvitems && numwilluse + amtused > BLOCKSIZE) {
375 373 /* set up new block */
376 374 if (supfing + 500 > SUPFING + supersize) {
377 375 i = supfing - SUPFING;
378 376 supersize += 20000;
379 377 if ((SUPFING = realloc(SUPFING, supersize)) == NULL) {
380 378 invcannotalloc(supersize);
381 379 return (0);
382 380 }
383 381 supfing = i + SUPFING;
384 382 #if DEBUG
385 383 (void) printf("reallocated superfinger space to %d, "
386 384 "totpost=%ld\n", supersize, totpost);
387 385 #endif
388 386 }
389 387 /* check that room for the offset as well */
390 388 if ((numlogblk + 10) > supintsize) {
391 389 i = supint - SUPINT;
392 390 supintsize += SUPERINC;
393 391 if ((SUPINT = realloc((char *)SUPINT,
394 392 supintsize * sizeof (long))) == NULL) {
395 393 invcannotalloc(supintsize * sizeof (long));
396 394 return (0);
397 395 }
398 396 supint = i + SUPINT;
399 397 #if DEBUG
400 398 (void) printf("reallocated superfinger offset to %d, "
401 399 "totpost = %ld\n", supintsize * sizeof (long),
402 400 totpost);
403 401 #endif
404 402 }
405 403 /* See if backup is efficatious */
406 404 backupflag = 0;
407 405 maxback = strlen(thisterm) / 10;
408 406 holditems = numinvitems;
409 407 if (maxback > numinvitems)
410 408 maxback = numinvitems - 2;
411 409 howfar = 0;
412 410 while (--maxback > 0) {
413 411 howfar++;
414 412 iteminfo.packword[0] =
415 413 logicalblk.invblk[--holditems * 2 +
416 414 (sizeof (long) - 1)];
417 415 if ((i = iteminfo.e.size / 10) < maxback) {
418 416 maxback = i;
419 417 backupflag = howfar;
420 418 gooditems = holditems;
421 419 tptr2 = logicalblk.chrblk + iteminfo.e.offset;
422 420 }
423 421 }
424 422 /* see if backup will occur */
425 423 if (backupflag) {
426 424 numinvitems = gooditems;
427 425 }
428 426 logicalblk.invblk[0] = numinvitems;
429 427 /* set forward pointer pointing to next */
430 428 logicalblk.invblk[1] = numlogblk + 1;
431 429 /* set back pointer to last block */
432 430 logicalblk.invblk[2] = numlogblk - 1;
433 431 if (fwrite((char *)logicalblk.chrblk, 1,
434 432 BLOCKSIZE, outfile) == 0) {
435 433 invcannotwrite(indexfile);
436 434 return (0);
437 435 }
438 436 amtused = 16;
439 437 numlogblk++;
440 438 /* check if had to back up, if so do it */
441 439 if (backupflag) {
442 440 /* find out where the end of the new block is */
443 441 iteminfo.packword[0] =
444 442 logicalblk.invblk[numinvitems * 2 + 1];
445 443 tptr3 = logicalblk.chrblk + iteminfo.e.offset;
446 444 /* move the index for this block */
447 445 for (i = 3; i <= (backupflag * 2 + 2); i++) {
448 446 logicalblk.invblk[i] =
449 447 logicalblk.invblk[numinvitems * 2+i];
450 448 }
451 449 /* move the word into the super index */
452 450 iteminfo.packword[0] = logicalblk.invblk[3];
453 451 iteminfo.packword[1] = logicalblk.invblk[4];
454 452 tptr2 = logicalblk.chrblk + iteminfo.e.offset;
455 453 (void) strncpy(supfing, tptr2, (int)iteminfo.e.size);
456 454 *(supfing + iteminfo.e.size) = '\0';
457 455 #if DEBUG
458 456 (void) printf("backup %d at term=%s to term=%s\n",
459 457 backupflag, thisterm, supfing);
460 458 #endif
461 459 *supint++ = nextsupfing;
462 460 nextsupfing += strlen(supfing) + 1;
463 461 supfing += strlen(supfing) + 1;
464 462 /* now fix up the logical block */
465 463 tptr = logicalblk.chrblk + lastinblk;
466 464 lastinblk = BLOCKSIZE;
467 465 tptr2 = logicalblk.chrblk + lastinblk;
468 466 j = tptr3 - tptr;
469 467 while (tptr3 > tptr)
470 468 *--tptr2 = *--tptr3;
471 469 lastinblk -= j;
472 470 amtused += (8 * backupflag + j);
473 471 for (i = 3; i < (backupflag * 2 + 2); i += 2) {
474 472 iteminfo.packword[0] = logicalblk.invblk[i];
475 473 iteminfo.e.offset += (tptr2 - tptr3);
476 474 logicalblk.invblk[i] = iteminfo.packword[0];
477 475 }
478 476 numinvitems = backupflag;
479 477 } else { /* no backup needed */
480 478 numinvitems = 0;
481 479 lastinblk = BLOCKSIZE;
482 480 /* add new term to superindex */
483 481 (void) strcpy(supfing, thisterm);
484 482 supfing += strlen(thisterm) + 1;
485 483 *supint++ = nextsupfing;
486 484 nextsupfing += strlen(thisterm) + 1;
487 485 }
488 486 }
489 487 lastinblk -= (numwilluse - 8);
490 488 iteminfo.e.offset = lastinblk;
491 489 iteminfo.e.size = (char)len;
492 490 iteminfo.e.space = 0;
493 491 iteminfo.e.post = numpost;
494 492 (void) strncpy(logicalblk.chrblk + lastinblk, thisterm, len);
495 493 amtused += numwilluse;
496 494 logicalblk.invblk[(lastinblk/sizeof (long))+wdlen] = nextpost;
497 495 if ((i = postptr - POST) > 0) {
498 496 if (fwrite((char *)POST, sizeof (POSTING), i, fpost) == 0) {
499 497 invcannotwrite(postingfile);
500 498 return (0);
501 499 }
502 500 nextpost += i * sizeof (POSTING);
503 501 }
504 502 logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0];
505 503 logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1];
506 504 return (1);
507 505 }
508 506
509 507 static void
510 508 swap_ints(void *p, size_t sz)
511 509 {
512 510 uint32_t *s;
513 511 uint32_t *e = (uint32_t *)p + (sz / sizeof (uint32_t));
514 512
515 513 for (s = p; s < e; s++)
516 514 *s = BSWAP_32(*s);
517 515 }
518 516
519 517 static void
520 518 write_param(INVCONTROL *invcntl)
521 519 {
522 520 if (invcntl->swap)
523 521 swap_ints(&invcntl->param, sizeof (invcntl->param));
524 522
525 523 rewind(invcntl->invfile);
526 524 (void) fwrite((char *)&invcntl->param, sizeof (invcntl->param), 1,
527 525 invcntl->invfile);
528 526
529 527 if (invcntl->swap)
530 528 swap_ints(&invcntl->param, sizeof (invcntl->param));
531 529 }
532 530
533 531 static void
534 532 read_superfinger(INVCONTROL *invcntl)
535 533 {
536 534 size_t count;
537 535
538 536 (void) fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
539 537 (void) fread(invcntl->iindex, (int)invcntl->param.supsize,
540 538 1, invcntl->invfile);
541 539
542 540 if (invcntl->swap) {
543 541 /*
544 542 * The superfinger consists of a count, followed by
545 543 * count offsets, followed by a string table (which
546 544 * the offsets reference).
547 545 *
548 546 * We need to swap the count and the offsets.
549 547 */
550 548 count = 1 + BSWAP_32(*(uint32_t *)invcntl->iindex);
551 549 swap_ints(invcntl->iindex, count * sizeof (unsigned long));
552 550 }
553 551 }
554 552
555 553 static void
556 554 read_logblock(INVCONTROL *invcntl, int block)
557 555 {
558 556 /* note always fetch it if the file is busy */
559 557 if ((block != invcntl->numblk) ||
560 558 (invcntl->param.filestat >= INVBUSY)) {
561 559 (void) fseek(invcntl->invfile,
562 560 (block * invcntl->param.sizeblk) + invcntl->param.cntlsize,
563 561 SEEK_SET);
564 562 invcntl->numblk = block;
565 563 (void) fread(invcntl->logblk, (int)invcntl->param.sizeblk,
566 564 1, invcntl->invfile);
567 565
568 566 if (invcntl->swap) {
569 567 size_t count;
570 568 ENTRY *ecur, *eend;
571 569 uint32_t *postptr;
572 570
573 571 /*
574 572 * A logblock consists of a count, a next block id,
575 573 * and a previous block id, followed by count
576 574 * ENTRYs, followed by alternating strings and
577 575 * offsets.
578 576 */
579 577 swap_ints(invcntl->logblk, 3 * sizeof (unsigned long));
580 578
581 579 count = *(uint32_t *)invcntl->logblk;
582 580
583 581 ecur = (ENTRY *)((uint32_t *)invcntl->logblk + 3);
584 582 eend = ecur + count;
585 583
586 584 for (; ecur < eend; ecur++) {
587 585 ecur->offset = BSWAP_16(ecur->offset);
588 586 ecur->post = BSWAP_32(ecur->post);
589 587
590 588 /*
591 589 * After the string is the posting offset.
592 590 */
593 591 postptr = (uint32_t *)(invcntl->logblk +
594 592 ecur->offset +
595 593 P2ROUNDUP(ecur->size, sizeof (long)));
596 594
597 595 *postptr = BSWAP_32(*postptr);
598 596 }
599 597 }
600 598 }
601 599 }
602 600
603 601 void
604 602 read_next_posting(INVCONTROL *invcntl, POSTING *posting)
605 603 {
606 604 (void) fread((char *)posting, sizeof (*posting), 1, invcntl->postfile);
607 605 if (invcntl->swap) {
608 606 posting->lineoffset = BSWAP_32(posting->lineoffset);
609 607 posting->fcnoffset = BSWAP_32(posting->fcnoffset);
610 608 /*
611 609 * fileindex is a 24-bit field, so shift it before swapping
612 610 */
613 611 posting->fileindex = BSWAP_32(posting->fileindex << 8);
614 612 }
615 613 }
616 614
617 615 int
618 616 invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat)
619 617 {
620 618 int read_index;
621 619
622 620 if ((invcntl->invfile = vpfopen(invname,
623 621 ((stat == 0) ? FREAD : FREADP))) == NULL) {
624 622 invcannotopen(invname);
625 623 return (-1);
626 624 }
627 625 if (fread((char *)&invcntl->param, sizeof (invcntl->param), 1,
628 626 invcntl->invfile) == 0) {
629 627 (void) fprintf(stderr, "%s: empty inverted file\n", argv0);
630 628 goto closeinv;
631 629 }
632 630 if (invcntl->param.version != VERSION &&
633 631 BSWAP_32(invcntl->param.version) != VERSION) {
634 632 (void) fprintf(stderr,
635 633 "%s: cannot read old index format; use -U option to "
636 634 "force database to rebuild\n", argv0);
637 635 goto closeinv;
638 636 }
639 637 invcntl->swap = (invcntl->param.version != VERSION);
640 638 if (invcntl->swap)
641 639 swap_ints(&invcntl->param, sizeof (invcntl->param));
642 640
643 641 if (stat == 0 && invcntl->param.filestat == INVALONE) {
644 642 (void) fprintf(stderr, "%s: inverted file is locked\n", argv0);
645 643 goto closeinv;
646 644 }
647 645 if ((invcntl->postfile = vpfopen(invpost,
648 646 ((stat == 0) ? FREAD : FREADP))) == NULL) {
649 647 invcannotopen(invpost);
650 648 goto closeinv;
651 649 }
652 650 /* allocate core for a logical block */
653 651 if ((invcntl->logblk = malloc(invcntl->param.sizeblk)) == NULL) {
654 652 invcannotalloc((unsigned)invcntl->param.sizeblk);
655 653 goto closeboth;
656 654 }
657 655 /* allocate for and read in superfinger */
658 656 read_index = 1;
659 657 invcntl->iindex = NULL;
660 658 #if SHARE
661 659 if (invcntl->param.share == 1) {
662 660 key_t ftok(), shm_key;
663 661 struct shmid_ds shm_buf;
664 662 char *shmat();
665 663 int shm_id;
666 664
667 665 /* see if the shared segment exists */
668 666 shm_key = ftok(invname, 2);
669 667 shm_id = shmget(shm_key, 0, 0);
670 668 /*
671 669 * Failure simply means (hopefully) that segment doesn't
672 670 * exist
673 671 */
674 672 if (shm_id == -1) {
675 673 /*
676 674 * Have to give general write permission due to AMdahl
677 675 * not having protected segments
678 676 */
679 677 shm_id = shmget(shm_key,
680 678 invcntl->param.supsize + sizeof (long),
681 679 IPC_CREAT | 0666);
682 680 if (shm_id == -1)
683 681 perror("Could not create shared "
684 682 "memory segment");
685 683 } else
686 684 read_index = 0;
687 685
688 686 if (shm_id != -1) {
689 687 invcntl->iindex = shmat(shm_id, 0,
690 688 ((read_index) ? 0 : SHM_RDONLY));
691 689 if (invcntl->iindex == (char *)ERR) {
692 690 (void) fprintf(stderr,
693 691 "%s: shared memory link failed\n", argv0);
694 692 invcntl->iindex = NULL;
695 693 read_index = 1;
696 694 }
697 695 }
698 696 }
699 697 #endif
700 698 if (invcntl->iindex == NULL)
701 699 invcntl->iindex = malloc(invcntl->param.supsize + 16);
702 700 if (invcntl->iindex == NULL) {
703 701 invcannotalloc((unsigned)invcntl->param.supsize);
704 702 free(invcntl->logblk);
705 703 goto closeboth;
706 704 }
707 705 if (read_index) {
708 706 read_superfinger(invcntl);
709 707 }
710 708 invcntl->numblk = -1;
711 709 if (boolready() == -1) {
712 710 closeboth:
713 711 (void) fclose(invcntl->postfile);
714 712 closeinv:
715 713 (void) fclose(invcntl->invfile);
716 714 return (-1);
717 715 }
718 716 /* write back out the control block if anything changed */
719 717 invcntl->param.filestat = stat;
720 718 if (stat > invcntl->param.filestat)
721 719 write_param(invcntl);
722 720 return (1);
723 721 }
724 722
725 723 /* invclose must be called to wrap things up and deallocate core */
726 724 void
727 725 invclose(INVCONTROL *invcntl)
728 726 {
729 727 /* write out the control block in case anything changed */
730 728 if (invcntl->param.filestat > 0) {
731 729 invcntl->param.filestat = 0;
732 730 write_param(invcntl);
733 731 }
734 732 (void) fclose(invcntl->invfile);
735 733 (void) fclose(invcntl->postfile);
736 734 #if SHARE
737 735 if (invcntl->param.share > 0) {
738 736 shmdt(invcntl->iindex);
739 737 invcntl->iindex = NULL;
740 738 }
741 739 #endif
742 740 if (invcntl->iindex != NULL)
743 741 free(invcntl->iindex);
744 742 free(invcntl->logblk);
745 743 }
746 744
747 745 /* invstep steps the inverted file forward one item */
748 746 void
749 747 invstep(INVCONTROL *invcntl)
750 748 {
751 749 if (invcntl->keypnt < (*(int *)invcntl->logblk - 1)) {
752 750 invcntl->keypnt++;
753 751 return;
754 752 }
755 753
756 754 /* move forward a block else wrap */
757 755 read_logblock(invcntl, *(int *)(invcntl->logblk + sizeof (long)));
758 756
759 757 invcntl->keypnt = 0;
760 758 }
761 759
762 760 /* invforward moves forward one term in the inverted file */
763 761 int
764 762 invforward(INVCONTROL *invcntl)
765 763 {
766 764 invstep(invcntl);
767 765 /* skip things with 0 postings */
768 766 while (((ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt)->post == 0) {
769 767 invstep(invcntl);
770 768 }
771 769 /* Check for having wrapped - reached start of inverted file! */
772 770 if ((invcntl->numblk == 0) && (invcntl->keypnt == 0))
773 771 return (0);
774 772 return (1);
775 773 }
776 774
777 775 /* invterm gets the present term from the present logical block */
778 776 int
779 777 invterm(INVCONTROL *invcntl, char *term)
780 778 {
781 779 ENTRY * entryptr;
782 780
783 781 entryptr = (ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt;
784 782 (void) strncpy(term, entryptr->offset + invcntl->logblk,
785 783 (int)entryptr->size);
786 784 *(term + entryptr->size) = '\0';
787 785 return (entryptr->post);
788 786 }
789 787
790 788 /* invfind searches for an individual item in the inverted file */
791 789 long
792 790 invfind(INVCONTROL *invcntl, char *searchterm)
793 791 {
794 792 int imid, ilow, ihigh;
795 793 long num;
796 794 int i;
797 795 unsigned long *intptr, *intptr2;
798 796 ENTRY *entryptr;
799 797
800 798 /* make sure it is initialized via invready */
801 799 if (invcntl->invfile == 0)
802 800 return (-1L);
803 801
804 802 /* now search for the appropriate finger block */
805 803 intptr = (unsigned long *)invcntl->iindex;
806 804
807 805 ilow = 0;
808 806 ihigh = *intptr++ - 1;
809 807 while (ilow <= ihigh) {
810 808 imid = (ilow + ihigh) / 2;
811 809 intptr2 = intptr + imid;
812 810 i = strcmp(searchterm, (invcntl->iindex + *intptr2));
813 811 if (i < 0)
814 812 ihigh = imid - 1;
815 813 else if (i > 0)
816 814 ilow = ++imid;
817 815 else {
818 816 ilow = imid + 1;
819 817 break;
820 818 }
821 819 }
822 820 /* be careful about case where searchterm is after last in this block */
823 821 imid = (ilow) ? ilow - 1 : 0;
824 822
825 823 /* fetch the appropriate logical block if not in core */
826 824 read_logblock(invcntl, imid);
827 825
828 826 srch_ext:
829 827 /* now find the term in this block. tricky this */
830 828 intptr = (unsigned long *)invcntl->logblk;
831 829
832 830 ilow = 0;
833 831 ihigh = *intptr - 1;
834 832 intptr += 3;
835 833 num = 0;
836 834 while (ilow <= ihigh) {
837 835 imid = (ilow + ihigh) / 2;
838 836 entryptr = (ENTRY *)intptr + imid;
839 837 i = strncmp(searchterm, (invcntl->logblk + entryptr->offset),
840 838 (int)entryptr->size);
841 839 if (i == 0)
842 840 i = strlen(searchterm) - entryptr->size;
843 841 if (i < 0)
844 842 ihigh = imid - 1;
845 843 else if (i > 0)
846 844 ilow = ++imid;
847 845 else {
848 846 num = entryptr->post;
849 847 break;
850 848 }
851 849 }
852 850 /* be careful about case where searchterm is after last in this block */
853 851 if (imid >= *(long *)invcntl->logblk) {
854 852 invcntl->keypnt = *(long *)invcntl->logblk;
855 853 invstep(invcntl);
856 854 /* note if this happens the term could be in extended block */
857 855 if (invcntl->param.startbyte <
858 856 invcntl->numblk * invcntl->param.sizeblk)
859 857 goto srch_ext;
860 858 } else
861 859 invcntl->keypnt = imid;
862 860 return (num);
863 861 }
864 862
865 863 #if DEBUG
866 864
867 865 /* invdump dumps the block the term parameter is in */
868 866 void
869 867 invdump(INVCONTROL *invcntl, char *term)
870 868 {
871 869 long i, j, n, *longptr;
872 870 ENTRY * entryptr;
873 871 char temp[512], *ptr;
874 872
875 873 /* dump superindex if term is "-" */
876 874 if (*term == '-') {
877 875 j = atoi(term + 1);
878 876 longptr = (long *)invcntl->iindex;
879 877 n = *longptr++;
880 878 (void) printf("Superindex dump, num blocks=%ld\n", n);
881 879 longptr += j;
882 880 while ((longptr <= ((long *)invcntl->iindex) + n) &&
883 881 invbreak == 0) {
884 882 (void) printf("%2ld %6ld %s\n", j++, *longptr,
885 883 invcntl->iindex + *longptr);
886 884 longptr++;
887 885 }
888 886 return;
889 887 } else if (*term == '#') {
890 888 j = atoi(term + 1);
891 889 /* fetch the appropriate logical block */
892 890 read_logblock(invcntl, j);
893 891 } else
894 892 i = abs((int)invfind(invcntl, term));
895 893 longptr = (long *)invcntl->logblk;
896 894 n = *longptr++;
897 895 (void) printf("Entry term to invdump=%s, postings=%ld, "
898 896 "forward ptr=%ld, back ptr=%ld\n", term, i, *(longptr),
899 897 *(longptr + 1));
900 898 entryptr = (ENTRY *)(invcntl->logblk + 12);
901 899 (void) printf("%ld terms in this block, block=%ld\n", n,
902 900 invcntl->numblk);
903 901 (void) printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n");
904 902 for (j = 0; j < n && invbreak == 0; j++) {
905 903 ptr = invcntl->logblk + entryptr->offset;
906 904 (void) strncpy(temp, ptr, (int)entryptr->size);
907 905 temp[entryptr->size] = '\0';
908 906 ptr += (sizeof (long) *
909 907 (long)((entryptr->size +
910 908 (sizeof (long) - 1)) / sizeof (long)));
911 909 (void) printf("%2ld %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp,
912 910 entryptr->post, entryptr->size, entryptr->offset,
913 911 entryptr->space, *(long *)ptr);
914 912 entryptr++;
915 913 }
916 914 }
917 915 #endif
918 916
919 917 static int
920 918 boolready(void)
921 919 {
922 920 numitems = 0;
923 921 if (item1 != NULL)
924 922 free(item1);
925 923 setsize1 = SETINC;
926 924 if ((item1 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
927 925 invcannotalloc(SETINC);
928 926 return (-1);
929 927 }
930 928 if (item2 != NULL)
931 929 free(item2);
932 930 setsize2 = SETINC;
933 931 if ((item2 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
934 932 invcannotalloc(SETINC);
935 933 return (-1);
936 934 }
937 935 item = item1;
938 936 enditem = item;
939 937 return (0);
940 938 }
↓ open down ↓ |
898 lines elided |
↑ open up ↑ |
941 939
942 940 void
943 941 boolclear(void)
944 942 {
945 943 numitems = 0;
946 944 item = item1;
947 945 enditem = item;
948 946 }
949 947
950 948 POSTING *
951 -boolfile(INVCONTROL *invcntl, long *num, int bool)
949 +boolfile(INVCONTROL *invcntl, long *num, int op)
952 950 {
953 951 ENTRY *entryptr;
954 952 FILE *file;
955 953 char *ptr;
956 954 unsigned long *ptr2;
957 955 POSTING *newitem;
958 956 POSTING posting;
959 957 unsigned u;
960 958 POSTING *newsetp, *set1p;
961 959 long newsetc, set1c, set2c;
962 960
963 961 entryptr = (ENTRY *) (invcntl->logblk + 12) + invcntl->keypnt;
964 962 ptr = invcntl->logblk + entryptr->offset;
965 963 ptr2 = ((unsigned long *)ptr) +
966 964 (entryptr->size + (sizeof (long) - 1)) / sizeof (long);
967 965 *num = entryptr->post;
968 - switch (bool) {
966 + switch (op) {
969 967 case OR:
970 968 case NOT:
971 969 if (*num == 0) {
972 970 *num = numitems;
973 971 return (item);
974 972 }
975 973 }
976 974 /* make room for the new set */
977 975 u = 0;
978 - switch (bool) {
976 + switch (op) {
979 977 case AND:
980 978 case NOT:
981 979 newsetp = set1p = item;
982 980 break;
983 981
984 982 case OR:
985 983 u = enditem - item;
986 984 /* FALLTHROUGH */
987 985 case REVERSENOT:
988 986 u += *num;
989 987 if (item == item2) {
990 988 if (u > setsize1) {
991 989 u += SETINC;
992 990 if ((item1 = (POSTING *) realloc(item1,
993 991 u * sizeof (POSTING))) == NULL) {
994 992 goto cannotalloc;
995 993 }
996 994 setsize1 = u;
997 995 }
998 996 newitem = item1;
999 997 } else {
1000 998 if (u > setsize2) {
1001 999 u += SETINC;
1002 1000 if ((item2 = (POSTING *)realloc(item2,
1003 1001 u * sizeof (POSTING))) == NULL) {
1004 1002 cannotalloc:
1005 1003 invcannotalloc(u * sizeof (POSTING));
1006 1004 (void) boolready();
1007 1005 *num = -1;
1008 1006 return (NULL);
1009 1007 }
1010 1008 setsize2 = u;
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
1011 1009 }
1012 1010 newitem = item2;
1013 1011 }
1014 1012 set1p = item;
1015 1013 newsetp = newitem;
1016 1014 }
1017 1015 file = invcntl->postfile;
1018 1016 (void) fseek(file, (long)*ptr2, SEEK_SET);
1019 1017 read_next_posting(invcntl, &posting);
1020 1018 newsetc = 0;
1021 - switch (bool) {
1019 + switch (op) {
1022 1020 case OR:
1023 1021 /* while something in both sets */
1024 1022 set1p = item;
1025 1023 newsetp = newitem;
1026 1024 for (set1c = 0, set2c = 0;
1027 1025 set1c < numitems && set2c < *num; newsetc++) {
1028 1026 if (set1p->lineoffset < posting.lineoffset) {
1029 1027 *newsetp++ = *set1p++;
1030 1028 set1c++;
1031 1029 } else if (set1p->lineoffset > posting.lineoffset) {
1032 1030 *newsetp++ = posting;
1033 1031 read_next_posting(invcntl, &posting);
1034 1032 set2c++;
1035 1033 } else if (set1p->type < posting.type) {
1036 1034 *newsetp++ = *set1p++;
1037 1035 set1c++;
1038 1036 } else if (set1p->type > posting.type) {
1039 1037 *newsetp++ = posting;
1040 1038 read_next_posting(invcntl, &posting);
1041 1039 set2c++;
1042 1040 } else { /* identical postings */
1043 1041 *newsetp++ = *set1p++;
1044 1042 set1c++;
1045 1043 read_next_posting(invcntl, &posting);
1046 1044 set2c++;
1047 1045 }
1048 1046 }
1049 1047 /* find out what ran out and move the rest in */
1050 1048 if (set1c < numitems) {
1051 1049 newsetc += numitems - set1c;
1052 1050 while (set1c++ < numitems) {
1053 1051 *newsetp++ = *set1p++;
1054 1052 }
1055 1053 } else {
1056 1054 while (set2c++ < *num) {
1057 1055 *newsetp++ = posting;
1058 1056 newsetc++;
1059 1057 read_next_posting(invcntl, &posting);
1060 1058 }
1061 1059 }
1062 1060 item = newitem;
1063 1061 break; /* end of OR */
1064 1062 #if 0
1065 1063 case AND:
1066 1064 set1c = 0;
1067 1065 set2c = 0;
1068 1066 while (set1c < numitems && set2c < *num) {
1069 1067 if (set1p->lineoffset < posting.lineoffset) {
1070 1068 set1p++;
1071 1069 set1c++;
1072 1070 } else if (set1p->lineoffset > posting.lineoffset) {
1073 1071 read_next_posting(invcntl, &posting);
1074 1072 set2c++;
1075 1073 } else if (set1p->type < posting.type) {
1076 1074 *set1p++;
1077 1075 set1c++;
1078 1076 } else if (set1p->type > posting.type) {
1079 1077 read_next_posting(invcntl, &posting);
1080 1078 set2c++;
1081 1079 } else { /* identical postings */
1082 1080 *newsetp++ = *set1p++;
1083 1081 newsetc++;
1084 1082 set1c++;
1085 1083 read_next_posting(invcntl, &posting);
1086 1084 set2c++;
1087 1085 }
1088 1086 }
1089 1087 break; /* end of AND */
1090 1088
1091 1089 case NOT:
1092 1090 set1c = 0;
1093 1091 set2c = 0;
1094 1092 while (set1c < numitems && set2c < *num) {
1095 1093 if (set1p->lineoffset < posting.lineoffset) {
1096 1094 *newsetp++ = *set1p++;
1097 1095 newsetc++;
1098 1096 set1c++;
1099 1097 } else if (set1p->lineoffset > posting.lineoffset) {
1100 1098 read_next_posting(invcntl, &posting);
1101 1099 set2c++;
1102 1100 } else if (set1p->type < posting.type) {
1103 1101 *newsetp++ = *set1p++;
1104 1102 newsetc++;
1105 1103 set1c++;
1106 1104 } else if (set1p->type > posting.type) {
1107 1105 read_next_posting(invcntl, &posting);
1108 1106 set2c++;
1109 1107 } else { /* identical postings */
1110 1108 set1c++;
1111 1109 set1p++;
1112 1110 read_next_posting(invcntl, &posting);
1113 1111 set2c++;
1114 1112 }
1115 1113 }
1116 1114 newsetc += numitems - set1c;
1117 1115 while (set1c++ < numitems) {
1118 1116 *newsetp++ = *set1p++;
1119 1117 }
1120 1118 break; /* end of NOT */
1121 1119
1122 1120 case REVERSENOT: /* core NOT incoming set */
1123 1121 set1c = 0;
1124 1122 set2c = 0;
1125 1123 while (set1c < numitems && set2c < *num) {
1126 1124 if (set1p->lineoffset < posting.lineoffset) {
1127 1125 set1p++;
1128 1126 set1c++;
1129 1127 } else if (set1p->lineoffset > posting.lineoffset) {
1130 1128 *newsetp++ = posting;
1131 1129 read_next_posting(invcntl, &posting);
1132 1130 set2c++;
1133 1131 } else if (set1p->type < posting.type) {
1134 1132 set1p++;
1135 1133 set1c++;
1136 1134 } else if (set1p->type > posting.type) {
1137 1135 *newsetp++ = posting;
1138 1136 read_next_posting(invcntl, &posting);
1139 1137 set2c++;
1140 1138 } else { /* identical postings */
1141 1139 set1c++;
1142 1140 set1p++;
1143 1141 read_next_posting(invcntl, &posting);
1144 1142 set2c++;
1145 1143 }
1146 1144 }
1147 1145 while (set2c++ < *num) {
1148 1146 *newsetp++ = posting;
1149 1147 newsetc++;
1150 1148 read_next_posting(invcntl, &posting);
1151 1149 }
1152 1150 item = newitem;
1153 1151 break; /* end of REVERSENOT */
1154 1152 #endif
1155 1153 }
1156 1154 numitems = newsetc;
1157 1155 *num = newsetc;
1158 1156 enditem = (POSTING *)newsetp;
1159 1157 return ((POSTING *)item);
1160 1158 }
1161 1159
1162 1160 #if 0
1163 1161 POSTING *
1164 1162 boolsave(int clear)
1165 1163 {
1166 1164 int i;
1167 1165 POSTING *ptr;
1168 1166 POSTING *oldstuff, *newstuff;
1169 1167
1170 1168 if (numitems == 0) {
1171 1169 if (clear)
1172 1170 boolclear();
1173 1171 return (NULL);
1174 1172 }
1175 1173 /*
1176 1174 * if clear then give them what we have and use (void)
1177 1175 * boolready to realloc
1178 1176 */
1179 1177 if (clear) {
1180 1178 ptr = item;
1181 1179 /* free up the space we didn't give them */
1182 1180 if (item == item1)
1183 1181 item1 = NULL;
1184 1182 else
1185 1183 item2 = NULL;
1186 1184 (void) boolready();
1187 1185 return (ptr);
1188 1186 }
1189 1187 i = (enditem - item) * sizeof (POSTING) + 100;
1190 1188 if ((ptr = (POSTING *)malloc(i))r == NULL) {
1191 1189 invcannotalloc(i);
1192 1190 return (ptr);
1193 1191 }
1194 1192 /* move present set into place */
1195 1193 oldstuff = item;
1196 1194 newstuff = ptr;
1197 1195 while (oldstuff < enditem)
1198 1196 *newstuff++ = *oldstuff++;
1199 1197 return (ptr);
1200 1198 }
1201 1199 #endif
1202 1200
1203 1201 static void
1204 1202 invcannotalloc(size_t n)
1205 1203 {
1206 1204 (void) fprintf(stderr, "%s: cannot allocate %u bytes\n", argv0, n);
1207 1205 }
1208 1206
1209 1207 static void
1210 1208 invcannotopen(char *file)
1211 1209 {
1212 1210 (void) fprintf(stderr, "%s: cannot open file %s\n", argv0, file);
1213 1211 }
1214 1212
1215 1213 static void
1216 1214 invcannotwrite(char *file)
1217 1215 {
1218 1216 (void) perror(argv0); /* must be first to preserve errno */
1219 1217 (void) fprintf(stderr, "%s: write to file %s failed\n", argv0, file);
1220 1218 }
↓ open down ↓ |
189 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX