read.c

Go to the documentation of this file.
00001 /* This file contains the heart of the mechanism used to read (and write)
00002  * files.  Read and write requests are split up into chunks that do not cross
00003  * block boundaries.  Each chunk is then processed in turn.  Reads on special
00004  * files are also detected and handled.
00005  *
00006  * The entry points into this file are
00007  *   do_read:    perform the READ system call by calling read_write
00008  *   read_write: actually do the work of READ and WRITE
00009  *   read_map:   given an inode and file position, look up its zone number
00010  *   rd_indir:   read an entry in an indirect block 
00011  *   read_ahead: manage the block read ahead business
00012  */
00013 
00014 #include "fs.h"
00015 #include <fcntl.h>
00016 #include <unistd.h>
00017 #include <minix/com.h>
00018 #include "buf.h"
00019 #include "file.h"
00020 #include "fproc.h"
00021 #include "inode.h"
00022 #include "param.h"
00023 #include "super.h"
00024 
00025 FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, off_t position,
00026         unsigned off, int chunk, unsigned left, int rw_flag,
00027         char *buff, int seg, int usr, int block_size, int *completed));
00028 
00029 /*===========================================================================*
00030  *                              do_read                                      *
00031  *===========================================================================*/
00032 PUBLIC int do_read()
00033 {
00034   return(read_write(READING));
00035 }
00036 
00037 /*===========================================================================*
00038  *                              read_write                                   *
00039  *===========================================================================*/
00040 PUBLIC int read_write(rw_flag)
00041 int rw_flag;                    /* READING or WRITING */
00042 {
00043 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
00044 
00045   register struct inode *rip;
00046   register struct filp *f;
00047   off_t bytes_left, f_size, position;
00048   unsigned int off, cum_io;
00049   int op, oflags, r, chunk, usr, seg, block_spec, char_spec;
00050   int regular, partial_pipe = 0, partial_cnt = 0;
00051   mode_t mode_word;
00052   struct filp *wf;
00053   int block_size;
00054   int completed, r2 = OK;
00055   phys_bytes p;
00056 
00057   /* PM loads segments by putting funny things in other bits of the
00058    * message, indicated by a high bit in fd.
00059    */
00060   if (who_e == PM_PROC_NR && (m_in.fd & _PM_SEG_FLAG)) {
00061         seg = (int) m_in.m1_p2;
00062         usr = (int) m_in.m1_p3;
00063         m_in.fd &= ~(_PM_SEG_FLAG);     /* get rid of flag bit */
00064   } else {
00065         usr = who_e;            /* normal case */
00066         seg = D;
00067   }
00068 
00069   /* If the file descriptor is valid, get the inode, size and mode. */
00070   if (m_in.nbytes < 0) return(EINVAL);
00071   if ((f = get_filp(m_in.fd)) == NIL_FILP) return(err_code);
00072   if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
00073         return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
00074   }
00075   if (m_in.nbytes == 0)
00076          return(0);     /* so char special files need not check for 0*/
00077 
00078   /* check if user process has the memory it needs.
00079    * if not, copying will fail later.
00080    * do this after 0-check above because umap doesn't want to map 0 bytes.
00081    */
00082   if ((r = sys_umap(usr, seg, (vir_bytes) m_in.buffer, m_in.nbytes, &p)) != OK) {
00083         printf("FS: read_write: umap failed for process %d\n", usr);
00084         return r;
00085   }
00086   position = f->filp_pos;
00087   oflags = f->filp_flags;
00088   rip = f->filp_ino;
00089   f_size = rip->i_size;
00090   r = OK;
00091   if (rip->i_pipe == I_PIPE) {
00092         /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
00093         cum_io = fp->fp_cum_io_partial; 
00094   } else {
00095         cum_io = 0;
00096   }
00097   op = (rw_flag == READING ? DEV_READ : DEV_WRITE);
00098   mode_word = rip->i_mode & I_TYPE;
00099   regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE;
00100 
00101   if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) {
00102         if (rip->i_zone[0] == NO_DEV)
00103                 panic(__FILE__,"read_write tries to read from "
00104                         "character device NO_DEV", NO_NUM);
00105         block_size = get_block_size(rip->i_zone[0]);
00106   }
00107   if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) {
00108         f_size = ULONG_MAX;
00109         if (rip->i_zone[0] == NO_DEV)
00110                 panic(__FILE__,"read_write tries to read from "
00111                 " block device NO_DEV", NO_NUM);
00112         block_size = get_block_size(rip->i_zone[0]);
00113   }
00114 
00115   if (!char_spec && !block_spec)
00116         block_size = rip->i_sp->s_block_size;
00117 
00118   rdwt_err = OK;                /* set to EIO if disk error occurs */
00119 
00120   /* Check for character special files. */
00121   if (char_spec) {
00122         dev_t dev;
00123         dev = (dev_t) rip->i_zone[0];
00124         r = dev_io(op, dev, usr, m_in.buffer, position, m_in.nbytes, oflags);
00125         if (r >= 0) {
00126                 cum_io = r;
00127                 position += r;
00128                 r = OK;
00129         }
00130   } else {
00131         if (rw_flag == WRITING && block_spec == 0) {
00132                 /* Check in advance to see if file will grow too big. */
00133                 if (position > rip->i_sp->s_max_size - m_in.nbytes) 
00134                         return(EFBIG);
00135 
00136                 /* Check for O_APPEND flag. */
00137                 if (oflags & O_APPEND) position = f_size;
00138 
00139                 /* Clear the zone containing present EOF if hole about
00140                  * to be created.  This is necessary because all unwritten
00141                  * blocks prior to the EOF must read as zeros.
00142                  */
00143                 if (position > f_size) clear_zone(rip, f_size, 0);
00144         }
00145 
00146         /* Pipes are a little different.  Check. */
00147         if (rip->i_pipe == I_PIPE) {
00148                r = pipe_check(rip, rw_flag, oflags,
00149                         m_in.nbytes, position, &partial_cnt, 0);
00150                if (r <= 0) return(r);
00151         }
00152 
00153         if (partial_cnt > 0) partial_pipe = 1;
00154 
00155         /* Split the transfer into chunks that don't span two blocks. */
00156         while (m_in.nbytes != 0) {
00157 
00158                 off = (unsigned int) (position % block_size);/* offset in blk*/
00159                 if (partial_pipe) {  /* pipes only */
00160                         chunk = MIN(partial_cnt, block_size - off);
00161                 } else
00162                         chunk = MIN(m_in.nbytes, block_size - off);
00163                 if (chunk < 0) chunk = block_size - off;
00164 
00165                 if (rw_flag == READING) {
00166                         bytes_left = f_size - position;
00167                         if (position >= f_size) break;  /* we are beyond EOF */
00168                         if (chunk > bytes_left) chunk = (int) bytes_left;
00169                 }
00170 
00171                 /* Read or write 'chunk' bytes. */
00172                 r = rw_chunk(rip, position, off, chunk, (unsigned) m_in.nbytes,
00173                              rw_flag, m_in.buffer, seg, usr, block_size, &completed);
00174 
00175                 if (r != OK) break;     /* EOF reached */
00176                 if (rdwt_err < 0) break;
00177 
00178                 /* Update counters and pointers. */
00179                 m_in.buffer += chunk;   /* user buffer address */
00180                 m_in.nbytes -= chunk;   /* bytes yet to be read */
00181                 cum_io += chunk;        /* bytes read so far */
00182                 position += chunk;      /* position within the file */
00183 
00184                 if (partial_pipe) {
00185                         partial_cnt -= chunk;
00186                         if (partial_cnt <= 0)  break;
00187                 }
00188         }
00189   }
00190 
00191   /* On write, update file size and access time. */
00192   if (rw_flag == WRITING) {
00193         if (regular || mode_word == I_DIRECTORY) {
00194                 if (position > f_size) rip->i_size = position;
00195         }
00196   } else {
00197         if (rip->i_pipe == I_PIPE) {
00198                 if ( position >= rip->i_size) {
00199                         /* Reset pipe pointers. */
00200                         rip->i_size = 0;        /* no data left */
00201                         position = 0;           /* reset reader(s) */
00202                         wf = find_filp(rip, W_BIT);
00203                         if (wf != NIL_FILP) wf->filp_pos = 0;
00204                 }
00205         }
00206   }
00207   f->filp_pos = position;
00208 
00209   /* Check to see if read-ahead is called for, and if so, set it up. */
00210   if (rw_flag == READING && rip->i_seek == NO_SEEK && position % block_size== 0
00211                 && (regular || mode_word == I_DIRECTORY)) {
00212         rdahed_inode = rip;
00213         rdahedpos = position;
00214   }
00215   rip->i_seek = NO_SEEK;
00216 
00217   if (rdwt_err != OK) r = rdwt_err;     /* check for disk error */
00218   if (rdwt_err == END_OF_FILE) r = OK;
00219 
00220   /* if user-space copying failed, read/write failed. */
00221   if (r == OK && r2 != OK) {
00222         r = r2;
00223   }
00224   if (r == OK) {
00225         if (rw_flag == READING) rip->i_update |= ATIME;
00226         if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
00227         rip->i_dirt = DIRTY;            /* inode is thus now dirty */
00228         if (partial_pipe) {
00229                 partial_pipe = 0;
00230                         /* partial write on pipe with */
00231                 /* O_NONBLOCK, return write count */
00232                 if (!(oflags & O_NONBLOCK)) {
00233                         fp->fp_cum_io_partial = cum_io;
00234                         suspend(XPIPE);   /* partial write on pipe with */
00235                         return(SUSPEND);  /* nbyte > PIPE_SIZE - non-atomic */
00236                 }
00237         }
00238         fp->fp_cum_io_partial = 0;
00239         return(cum_io);
00240   }
00241   return(r);
00242 }
00243 
00244 /*===========================================================================*
00245  *                              rw_chunk                                     *
00246  *===========================================================================*/
00247 PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff,
00248  seg, usr, block_size, completed)
00249 register struct inode *rip;     /* pointer to inode for file to be rd/wr */
00250 off_t position;                 /* position within file to read or write */
00251 unsigned off;                   /* off within the current block */
00252 int chunk;                      /* number of bytes to read or write */
00253 unsigned left;                  /* max number of bytes wanted after position */
00254 int rw_flag;                    /* READING or WRITING */
00255 char *buff;                     /* virtual address of the user buffer */
00256 int seg;                        /* T or D segment in user space */
00257 int usr;                        /* which user process */
00258 int block_size;                 /* block size of FS operating on */
00259 int *completed;                 /* number of bytes copied */
00260 {
00261 /* Read or write (part of) a block. */
00262 
00263   register struct buf *bp;
00264   register int r = OK;
00265   int n, block_spec;
00266   block_t b;
00267   dev_t dev;
00268 
00269   *completed = 0;
00270 
00271   block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
00272   if (block_spec) {
00273         b = position/block_size;
00274         dev = (dev_t) rip->i_zone[0];
00275   } else {
00276         b = read_map(rip, position);
00277         dev = rip->i_dev;
00278   }
00279 
00280   if (!block_spec && b == NO_BLOCK) {
00281         if (rw_flag == READING) {
00282                 /* Reading from a nonexistent block.  Must read as all zeros.*/
00283                 bp = get_block(NO_DEV, NO_BLOCK, NORMAL);    /* get a buffer */
00284                 zero_block(bp);
00285         } else {
00286                 /* Writing to a nonexistent block. Create and enter in inode.*/
00287                 if ((bp= new_block(rip, position)) == NIL_BUF)return(err_code);
00288         }
00289   } else if (rw_flag == READING) {
00290         /* Read and read ahead if convenient. */
00291         bp = rahead(rip, b, position, left);
00292   } else {
00293         /* Normally an existing block to be partially overwritten is first read
00294          * in.  However, a full block need not be read in.  If it is already in
00295          * the cache, acquire it, otherwise just acquire a free buffer.
00296          */
00297         n = (chunk == block_size ? NO_READ : NORMAL);
00298         if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ;
00299         bp = get_block(dev, b, n);
00300   }
00301 
00302   /* In all cases, bp now points to a valid buffer. */
00303   if (bp == NIL_BUF) {
00304         panic(__FILE__,"bp not valid in rw_chunk, this can't happen", NO_NUM);
00305   }
00306   if (rw_flag == WRITING && chunk != block_size && !block_spec &&
00307                                         position >= rip->i_size && off == 0) {
00308         zero_block(bp);
00309   }
00310 
00311   if (rw_flag == READING) {
00312         /* Copy a chunk from the block buffer to user space. */
00313         r = sys_vircopy(FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
00314                         usr, seg, (phys_bytes) buff,
00315                         (phys_bytes) chunk);
00316   } else {
00317         /* Copy a chunk from user space to the block buffer. */
00318         r = sys_vircopy(usr, seg, (phys_bytes) buff,
00319                         FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
00320                         (phys_bytes) chunk);
00321         bp->b_dirt = DIRTY;
00322   }
00323   n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
00324   put_block(bp, n);
00325 
00326   return(r);
00327 }
00328 
00329 
00330 /*===========================================================================*
00331  *                              read_map                                     *
00332  *===========================================================================*/
00333 PUBLIC block_t read_map(rip, position)
00334 register struct inode *rip;     /* ptr to inode to map from */
00335 off_t position;                 /* position in file whose blk wanted */
00336 {
00337 /* Given an inode and a position within the corresponding file, locate the
00338  * block (not zone) number in which that position is to be found and return it.
00339  */
00340 
00341   register struct buf *bp;
00342   register zone_t z;
00343   int scale, boff, dzones, nr_indirects, index, zind, ex;
00344   block_t b;
00345   long excess, zone, block_pos;
00346   
00347   scale = rip->i_sp->s_log_zone_size;   /* for block-zone conversion */
00348   block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */
00349   zone = block_pos >> scale;    /* position's zone */
00350   boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
00351   dzones = rip->i_ndzones;
00352   nr_indirects = rip->i_nindirs;
00353 
00354   /* Is 'position' to be found in the inode itself? */
00355   if (zone < dzones) {
00356         zind = (int) zone;      /* index should be an int */
00357         z = rip->i_zone[zind];
00358         if (z == NO_ZONE) return(NO_BLOCK);
00359         b = ((block_t) z << scale) + boff;
00360         return(b);
00361   }
00362 
00363   /* It is not in the inode, so it must be single or double indirect. */
00364   excess = zone - dzones;       /* first Vx_NR_DZONES don't count */
00365 
00366   if (excess < nr_indirects) {
00367         /* 'position' can be located via the single indirect block. */
00368         z = rip->i_zone[dzones];
00369   } else {
00370         /* 'position' can be located via the double indirect block. */
00371         if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
00372         excess -= nr_indirects;                 /* single indir doesn't count*/
00373         b = (block_t) z << scale;
00374         bp = get_block(rip->i_dev, b, NORMAL);  /* get double indirect block */
00375         index = (int) (excess/nr_indirects);
00376         z = rd_indir(bp, index);                /* z= zone for single*/
00377         put_block(bp, INDIRECT_BLOCK);          /* release double ind block */
00378         excess = excess % nr_indirects;         /* index into single ind blk */
00379   }
00380 
00381   /* 'z' is zone num for single indirect block; 'excess' is index into it. */
00382   if (z == NO_ZONE) return(NO_BLOCK);
00383   b = (block_t) z << scale;                     /* b is blk # for single ind */
00384   bp = get_block(rip->i_dev, b, NORMAL);        /* get single indirect block */
00385   ex = (int) excess;                            /* need an integer */
00386   z = rd_indir(bp, ex);                         /* get block pointed to */
00387   put_block(bp, INDIRECT_BLOCK);                /* release single indir blk */
00388   if (z == NO_ZONE) return(NO_BLOCK);
00389   b = ((block_t) z << scale) + boff;
00390   return(b);
00391 }
00392 
00393 /*===========================================================================*
00394  *                              rd_indir                                     *
00395  *===========================================================================*/
00396 PUBLIC zone_t rd_indir(bp, index)
00397 struct buf *bp;                 /* pointer to indirect block */
00398 int index;                      /* index into *bp */
00399 {
00400 /* Given a pointer to an indirect block, read one entry.  The reason for
00401  * making a separate routine out of this is that there are four cases:
00402  * V1 (IBM and 68000), and V2 (IBM and 68000).
00403  */
00404 
00405   struct super_block *sp;
00406   zone_t zone;                  /* V2 zones are longs (shorts in V1) */
00407 
00408   if(bp == NIL_BUF)
00409         panic(__FILE__, "rd_indir() on NIL_BUF", NO_NUM);
00410 
00411   sp = get_super(bp->b_dev);    /* need super block to find file sys type */
00412 
00413   /* read a zone from an indirect block */
00414   if (sp->s_version == V1)
00415         zone = (zone_t) conv2(sp->s_native, (int)  bp->b_v1_ind[index]);
00416   else
00417         zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);
00418 
00419   if (zone != NO_ZONE &&
00420                 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
00421         printf("Illegal zone number %ld in indirect block, index %d\n",
00422                (long) zone, index);
00423         panic(__FILE__,"check file system", NO_NUM);
00424   }
00425   return(zone);
00426 }
00427 
00428 /*===========================================================================*
00429  *                              read_ahead                                   *
00430  *===========================================================================*/
00431 PUBLIC void read_ahead()
00432 {
00433 /* Read a block into the cache before it is needed. */
00434   int block_size;
00435   register struct inode *rip;
00436   struct buf *bp;
00437   block_t b;
00438 
00439   rip = rdahed_inode;           /* pointer to inode to read ahead from */
00440   block_size = get_block_size(rip->i_dev);
00441   rdahed_inode = NIL_INODE;     /* turn off read ahead */
00442   if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return;      /* at EOF */
00443   bp = rahead(rip, b, rdahedpos, block_size);
00444   put_block(bp, PARTIAL_DATA_BLOCK);
00445 }
00446 
00447 /*===========================================================================*
00448  *                              rahead                                       *
00449  *===========================================================================*/
00450 PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
00451 register struct inode *rip;     /* pointer to inode for file to be read */
00452 block_t baseblock;              /* block at current position */
00453 off_t position;                 /* position within file */
00454 unsigned bytes_ahead;           /* bytes beyond position for immediate use */
00455 {
00456 /* Fetch a block from the cache or the device.  If a physical read is
00457  * required, prefetch as many more blocks as convenient into the cache.
00458  * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
00459  * The device driver may decide it knows better and stop reading at a
00460  * cylinder boundary (or after an error).  Rw_scattered() puts an optional
00461  * flag on all reads to allow this.
00462  */
00463   int block_size;
00464 /* Minimum number of blocks to prefetch. */
00465 # define BLOCKS_MINIMUM         (NR_BUFS < 50 ? 18 : 32)
00466   int block_spec, scale, read_q_size;
00467   unsigned int blocks_ahead, fragment;
00468   block_t block, blocks_left;
00469   off_t ind1_pos;
00470   dev_t dev;
00471   struct buf *bp;
00472   static struct buf *read_q[NR_BUFS];
00473 
00474   block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
00475   if (block_spec) {
00476         dev = (dev_t) rip->i_zone[0];
00477   } else {
00478         dev = rip->i_dev;
00479   }
00480   block_size = get_block_size(dev);
00481 
00482   block = baseblock;
00483   bp = get_block(dev, block, PREFETCH);
00484   if (bp->b_dev != NO_DEV) return(bp);
00485 
00486   /* The best guess for the number of blocks to prefetch:  A lot.
00487    * It is impossible to tell what the device looks like, so we don't even
00488    * try to guess the geometry, but leave it to the driver.
00489    *
00490    * The floppy driver can read a full track with no rotational delay, and it
00491    * avoids reading partial tracks if it can, so handing it enough buffers to
00492    * read two tracks is perfect.  (Two, because some diskette types have
00493    * an odd number of sectors per track, so a block may span tracks.)
00494    *
00495    * The disk drivers don't try to be smart.  With todays disks it is
00496    * impossible to tell what the real geometry looks like, so it is best to
00497    * read as much as you can.  With luck the caching on the drive allows
00498    * for a little time to start the next read.
00499    *
00500    * The current solution below is a bit of a hack, it just reads blocks from
00501    * the current file position hoping that more of the file can be found.  A
00502    * better solution must look at the already available zone pointers and
00503    * indirect blocks (but don't call read_map!).
00504    */
00505 
00506   fragment = position % block_size;
00507   position -= fragment;
00508   bytes_ahead += fragment;
00509 
00510   blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
00511 
00512   if (block_spec && rip->i_size == 0) {
00513         blocks_left = NR_IOREQS;
00514   } else {
00515         blocks_left = (rip->i_size - position + block_size - 1) / block_size;
00516 
00517         /* Go for the first indirect block if we are in its neighborhood. */
00518         if (!block_spec) {
00519                 scale = rip->i_sp->s_log_zone_size;
00520                 ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
00521                 if (position <= ind1_pos && rip->i_size > ind1_pos) {
00522                         blocks_ahead++;
00523                         blocks_left++;
00524                 }
00525         }
00526   }
00527 
00528   /* No more than the maximum request. */
00529   if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
00530 
00531   /* Read at least the minimum number of blocks, but not after a seek. */
00532   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
00533         blocks_ahead = BLOCKS_MINIMUM;
00534 
00535   /* Can't go past end of file. */
00536   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
00537 
00538   read_q_size = 0;
00539 
00540   /* Acquire block buffers. */
00541   for (;;) {
00542         read_q[read_q_size++] = bp;
00543 
00544         if (--blocks_ahead == 0) break;
00545 
00546         /* Don't trash the cache, leave 4 free. */
00547         if (bufs_in_use >= NR_BUFS - 4) break;
00548 
00549         block++;
00550 
00551         bp = get_block(dev, block, PREFETCH);
00552         if (bp->b_dev != NO_DEV) {
00553                 /* Oops, block already in the cache, get out. */
00554                 put_block(bp, FULL_DATA_BLOCK);
00555                 break;
00556         }
00557   }
00558   rw_scattered(dev, read_q, read_q_size, READING);
00559   return(get_block(dev, baseblock, NORMAL));
00560 }

Generated on Fri Apr 14 22:57:30 2006 for minix by  doxygen 1.4.6