1/*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if defined(LIBC_SCCS) && !defined(lint)
35static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
36#endif /* LIBC_SCCS and not lint */
37
38#include <sys/param.h>
39#include <bsd-queue.h>
40
41#include <errno.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45
46#include <db.h>
47
48#define __MPOOLINTERFACE_PRIVATE
49#include <mpool.h>
50
51static BKT *mpool_bkt __P((MPOOL *));
52static BKT *mpool_look __P((MPOOL *, pgno_t));
53static int mpool_write __P((MPOOL *, BKT *));
54
55/*
56 * mpool_open --
57 * Initialize a memory pool.
58 */
59MPOOL *
60mpool_open(key, fd, fvtable, pagesize, maxcache)
61 void *key;
62 virt_fd_t fd;
63 const FILEVTABLE * fvtable;
64 pgno_t pagesize, maxcache;
65{
66 MPOOL *mp;
67 int entry;
68
69 /* Allocate and initialize the MPOOL cookie. */
70 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
71 return (NULL);
72 CIRCLEQ_INIT(&mp->lqh);
73 for (entry = 0; entry < HASHSIZE; ++entry)
74 CIRCLEQ_INIT(&mp->hqh[entry]);
75 mp->maxcache = maxcache;
76 mp->fvtable = fvtable;
77 off_t file_size = mp->fvtable->lseek(fd, 0, SEEK_END);
78 if (file_size == (off_t)-1)
79 return (NULL);
80 mp->npages = file_size / pagesize;
81 mp->pagesize = pagesize;
82 mp->fd = fd;
83 return (mp);
84}
85
86/*
87 * mpool_filter --
88 * Initialize input/output filters.
89 */
90void
91mpool_filter(mp, pgin, pgout, pgcookie)
92 MPOOL *mp;
93 void (*pgin) __P((void *, pgno_t, void *));
94 void (*pgout) __P((void *, pgno_t, void *));
95 void *pgcookie;
96{
97 mp->pgin = pgin;
98 mp->pgout = pgout;
99 mp->pgcookie = pgcookie;
100}
101
102/*
103 * mpool_new --
104 * Get a new page of memory.
105 */
106void *
107mpool_new(mp, pgnoaddr)
108 MPOOL *mp;
109 pgno_t *pgnoaddr;
110{
111 struct _hqh *head;
112 BKT *bp;
113
114 if (mp->npages == MAX_PAGE_NUMBER) {
115 mpool_error("mpool_new: page allocation overflow.\n");
116 abort();
117 }
118#ifdef STATISTICS
119 ++mp->pagenew;
120#endif
121 /*
122 * Get a BKT from the cache. Assign a new page number, attach
123 * it to the head of the hash chain, the tail of the lru chain,
124 * and return.
125 */
126 if ((bp = mpool_bkt(mp)) == NULL)
127 return (NULL);
128 *pgnoaddr = bp->pgno = mp->npages++;
129 bp->flags = MPOOL_PINNED;
130
131 head = &mp->hqh[HASHKEY(bp->pgno)];
132 CIRCLEQ_INSERT_HEAD(head, bp, hq);
133 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
134 return (bp->page);
135}
136
137/*
138 * mpool_get
139 * Get a page.
140 */
141void *
142mpool_get(mp, pgno, flags)
143 MPOOL *mp;
144 pgno_t pgno;
145 u_int flags; /* XXX not used? */
146{
147 struct _hqh *head;
148 BKT *bp;
149 off_t off;
150 int nr;
151
152 /* Check for attempt to retrieve a non-existent page. */
153 if (pgno >= mp->npages) {
154 errno = EINVAL;
155 return (NULL);
156 }
157
158#ifdef STATISTICS
159 ++mp->pageget;
160#endif
161
162 /* Check for a page that is cached. */
163 if ((bp = mpool_look(mp, pgno)) != NULL) {
164#ifdef DEBUG
165 if (bp->flags & MPOOL_PINNED) {
166 mpool_error(
167 "mpool_get: page %d already pinned\n", bp->pgno);
168 abort();
169 }
170#endif
171 /*
172 * Move the page to the head of the hash chain and the tail
173 * of the lru chain.
174 */
175 head = &mp->hqh[HASHKEY(bp->pgno)];
176 CIRCLEQ_REMOVE(head, bp, hq);
177 CIRCLEQ_INSERT_HEAD(head, bp, hq);
178 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
179 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
180
181 /* Return a pinned page. */
182 bp->flags |= MPOOL_PINNED;
183 return (bp->page);
184 }
185
186 /* Get a page from the cache. */
187 if ((bp = mpool_bkt(mp)) == NULL)
188 return (NULL);
189
190 /* Read in the contents. */
191#ifdef STATISTICS
192 ++mp->pageread;
193#endif
194 off = mp->pagesize * pgno;
195 if (mp->fvtable->lseek(mp->fd, off, SEEK_SET) != off)
196 return (NULL);
197 if ((nr = mp->fvtable->read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
198 if (nr >= 0)
199 errno = EFTYPE;
200 return (NULL);
201 }
202
203 /* Set the page number, pin the page. */
204 bp->pgno = pgno;
205 bp->flags = MPOOL_PINNED;
206
207 /*
208 * Add the page to the head of the hash chain and the tail
209 * of the lru chain.
210 */
211 head = &mp->hqh[HASHKEY(bp->pgno)];
212 CIRCLEQ_INSERT_HEAD(head, bp, hq);
213 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
214
215 /* Run through the user's filter. */
216 if (mp->pgin != NULL)
217 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
218
219 return (bp->page);
220}
221
222/*
223 * mpool_put
224 * Return a page.
225 */
226int
227mpool_put(mp, page, flags)
228 MPOOL *mp;
229 void *page;
230 u_int flags;
231{
232 BKT *bp;
233
234#ifdef STATISTICS
235 ++mp->pageput;
236#endif
237 bp = (BKT *)((char *)page - sizeof(BKT));
238#ifdef DEBUG
239 if (!(bp->flags & MPOOL_PINNED)) {
240 mpool_error(
241 "mpool_put: page %d not pinned\n", bp->pgno);
242 abort();
243 }
244#endif
245 bp->flags &= ~MPOOL_PINNED;
246 bp->flags |= flags & MPOOL_DIRTY;
247 return (RET_SUCCESS);
248}
249
250/*
251 * mpool_close
252 * Close the buffer pool.
253 */
254int
255mpool_close(mp)
256 MPOOL *mp;
257{
258 BKT *bp;
259
260 /* Free up any space allocated to the lru pages. */
261 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
262 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
263 free(bp);
264 }
265
266 /* Free the MPOOL cookie. */
267 free(mp);
268 return (RET_SUCCESS);
269}
270
271/*
272 * mpool_sync
273 * Sync the pool to disk.
274 */
275int
276mpool_sync(mp)
277 MPOOL *mp;
278{
279 BKT *bp;
280
281 /* Walk the lru chain, flushing any dirty pages to disk. */
282 for (bp = mp->lqh.cqh_first;
283 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
284 if (bp->flags & MPOOL_DIRTY &&
285 mpool_write(mp, bp) == RET_ERROR)
286 return (RET_ERROR);
287
288 /* Sync the file descriptor. */
289 return (mp->fvtable->fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
290}
291
292/*
293 * mpool_bkt
294 * Get a page from the cache (or create one).
295 */
296static BKT *
297mpool_bkt(mp)
298 MPOOL *mp;
299{
300 struct _hqh *head;
301 BKT *bp;
302
303 /* If under the max cached, always create a new page. */
304 if (mp->curcache < mp->maxcache)
305 goto new;
306
307 /*
308 * If the cache is max'd out, walk the lru list for a buffer we
309 * can flush. If we find one, write it (if necessary) and take it
310 * off any lists. If we don't find anything we grow the cache anyway.
311 * The cache never shrinks.
312 */
313 for (bp = mp->lqh.cqh_first;
314 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
315 if (!(bp->flags & MPOOL_PINNED)) {
316 /* Flush if dirty. */
317 if (bp->flags & MPOOL_DIRTY &&
318 mpool_write(mp, bp) == RET_ERROR)
319 return (NULL);
320#ifdef STATISTICS
321 ++mp->pageflush;
322#endif
323 /* Remove from the hash and lru queues. */
324 head = &mp->hqh[HASHKEY(bp->pgno)];
325 CIRCLEQ_REMOVE(head, bp, hq);
326 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
327#ifdef DEBUG
328 { void *spage;
329 spage = bp->page;
330 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
331 bp->page = spage;
332 }
333#endif
334 return (bp);
335 }
336
337new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
338 return (NULL);
339#ifdef STATISTICS
340 ++mp->pagealloc;
341#endif
342#if defined(DEBUG) || defined(PURIFY)
343 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
344#endif
345 bp->page = (char *)bp + sizeof(BKT);
346 ++mp->curcache;
347 return (bp);
348}
349
350/*
351 * mpool_write
352 * Write a page to disk.
353 */
354static int
355mpool_write(mp, bp)
356 MPOOL *mp;
357 BKT *bp;
358{
359 off_t off;
360
361#ifdef STATISTICS
362 ++mp->pagewrite;
363#endif
364
365 /* Run through the user's filter. */
366 if (mp->pgout)
367 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
368
369 off = mp->pagesize * bp->pgno;
370 if (mp->fvtable->lseek(mp->fd, off, SEEK_SET) != off)
371 return (RET_ERROR);
372 if (mp->fvtable->write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
373 return (RET_ERROR);
374
375 bp->flags &= ~MPOOL_DIRTY;
376 return (RET_SUCCESS);
377}
378
379/*
380 * mpool_look
381 * Lookup a page in the cache.
382 */
383static BKT *
384mpool_look(mp, pgno)
385 MPOOL *mp;
386 pgno_t pgno;
387{
388 struct _hqh *head;
389 BKT *bp;
390
391 head = &mp->hqh[HASHKEY(pgno)];
392 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
393 if (bp->pgno == pgno) {
394#ifdef STATISTICS
395 ++mp->cachehit;
396#endif
397 return (bp);
398 }
399#ifdef STATISTICS
400 ++mp->cachemiss;
401#endif
402 return (NULL);
403}
404
405#ifdef STATISTICS
406/*
407 * mpool_stat
408 * Print out cache statistics.
409 */
410void
411mpool_stat(mp)
412 MPOOL *mp;
413{
414 BKT *bp;
415 int cnt;
416 char *sep;
417
418 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
419 (void)fprintf(stderr,
420 "page size %lu, cacheing %lu pages of %lu page max cache\n",
421 mp->pagesize, mp->curcache, mp->maxcache);
422 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
423 mp->pageput, mp->pageget, mp->pagenew);
424 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
425 mp->pagealloc, mp->pageflush);
426 if (mp->cachehit + mp->cachemiss)
427 (void)fprintf(stderr,
428 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
429 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
430 * 100, mp->cachehit, mp->cachemiss);
431 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
432 mp->pageread, mp->pagewrite);
433
434 sep = "";
435 cnt = 0;
436 for (bp = mp->lqh.cqh_first;
437 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
438 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
439 if (bp->flags & MPOOL_DIRTY)
440 (void)fprintf(stderr, "d");
441 if (bp->flags & MPOOL_PINNED)
442 (void)fprintf(stderr, "P");
443 if (++cnt == 10) {
444 sep = "\n";
445 cnt = 0;
446 } else
447 sep = ", ";
448
449 }
450 (void)fprintf(stderr, "\n");
451}
452#endif
453