VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77530

最後變更 在這個檔案從77530是 77530,由 vboxsync 提交於 6 年 前

linux/vboxsf: Use vbxsf as prefix here - part II. bugref:9172

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 52.0 KB
 
1/* $Id: regops.c 77530 2019-03-01 14:39:03Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31#include "vfsmod.h"
32#include <linux/uio.h>
33#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34# include <linux/aio.h> /* struct kiocb before 4.1 */
35#endif
36#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37# include <linux/buffer_head.h>
38#endif
39#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/writeback.h>
42#endif
43#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45# include <linux/splice.h>
46#endif
47#include <iprt/err.h>
48
49#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50# define SEEK_END 2
51#endif
52
53
54/**
55 * Called when an inode is released to unlink all handles that might impossibly
56 * still be associated with it.
57 *
58 * @param pInodeInfo The inode which handles to drop.
59 */
60void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
61{
62 struct sf_handle *pCur, *pNext;
63 unsigned long fSavedFlags;
64 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
65 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct sf_handle, Entry) {
68 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
69 ("%p %#x\n", pCur, pCur->fFlags));
70 pCur->fFlags |= SF_HANDLE_F_ON_LIST;
71 RTListNodeRemove(&pCur->Entry);
72 }
73
74 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75}
76
77
78/**
79 * Locates a handle that matches all the flags in @a fFlags.
80 *
81 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
82 * release it. NULL if no suitable handle was found.
83 * @param pInodeInfo The inode info to search.
84 * @param fFlagsSet The flags that must be set.
85 * @param fFlagsClear The flags that must be clear.
86 */
87struct sf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88{
89 struct sf_handle *pCur;
90 unsigned long fSavedFlags;
91 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93 RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
94 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
95 ("%p %#x\n", pCur, pCur->fFlags));
96 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
97 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98 if (cRefs > 1) {
99 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
101 return pCur;
102 }
103 /* Oops, already being closed (safe as it's only ever increased here). */
104 ASMAtomicDecU32(&pCur->cRefs);
105 }
106 }
107
108 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
110 return NULL;
111}
112
113
114/**
115 * Slow worker for vbsf_handle_release() that does the freeing.
116 *
117 * @returns 0 (ref count).
118 * @param pHandle The handle to release.
119 * @param sf_g The info structure for the shared folder associated
120 * with the handle.
121 * @param pszCaller The caller name (for logging failures).
122 */
123uint32_t vbsf_handle_release_slow(struct sf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
124{
125 int rc;
126 unsigned long fSavedFlags;
127
128 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130 /*
131 * Remove from the list.
132 */
133 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135 AssertMsg((pHandle->fFlags & SF_HANDLE_F_MAGIC_MASK) == SF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136 Assert(pHandle->pInodeInfo);
137 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139 if (pHandle->fFlags & SF_HANDLE_F_ON_LIST) {
140 pHandle->fFlags &= ~SF_HANDLE_F_ON_LIST;
141 RTListNodeRemove(&pHandle->Entry);
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146 /*
147 * Actually destroy it.
148 */
149 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150 if (RT_FAILURE(rc))
151 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152 pHandle->hHost = SHFL_HANDLE_NIL;
153 pHandle->fFlags = SF_HANDLE_F_MAGIC_DEAD;
154 kfree(pHandle);
155 return 0;
156}
157
158
159/**
160 * Appends a handle to a handle list.
161 *
162 * @param pInodeInfo The inode to add it to.
163 * @param pHandle The handle to add.
164 */
165void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct sf_handle *pHandle)
166{
167#ifdef VBOX_STRICT
168 struct sf_handle *pCur;
169#endif
170 unsigned long fSavedFlags;
171
172 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173 AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
174 ("%p %#x\n", pHandle, pHandle->fFlags));
175 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179 AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
180 ("%p %#x\n", pHandle, pHandle->fFlags));
181#ifdef VBOX_STRICT
182 RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
183 Assert(pCur != pHandle);
184 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
185 ("%p %#x\n", pCur, pCur->fFlags));
186 }
187 pHandle->pInodeInfo = pInodeInfo;
188#endif
189
190 pHandle->fFlags |= SF_HANDLE_F_ON_LIST;
191 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194}
195
196
197#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200/*
201 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
202 */
203
204static void vbsf_free_pipebuf(struct page *kpage)
205{
206 kunmap(kpage);
207 __free_pages(kpage, 0);
208}
209
210static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
211{
212 return 0;
213}
214
215static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
216{
217}
218
219static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
220{
221}
222
223static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
224{
225 return 0;
226}
227
228static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
229{
230 vbsf_free_pipebuf(pipe_buf->page);
231}
232
233static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
234{
235 return 0;
236}
237
238static struct pipe_buf_operations vbsf_pipe_buf_ops = {
239 .can_merge = 0,
240 .map = vbsf_pipe_buf_map,
241 .unmap = vbsf_pipe_buf_unmap,
242 .confirm = vbsf_pipe_buf_confirm,
243 .release = vbsf_pipe_buf_release,
244 .steal = vbsf_pipe_buf_steal,
245 .get = vbsf_pipe_buf_get,
246};
247
248static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
249 void *buf, uint32_t *nread, uint64_t pos)
250{
251 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
252 if (RT_FAILURE(rc)) {
253 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
254 rc));
255 return -EPROTO;
256 }
257 return 0;
258}
259
260# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
261# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
262
263ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
264{
265 size_t bytes_remaining = len;
266 loff_t orig_offset = *poffset;
267 loff_t offset = orig_offset;
268 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
269 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
270 struct vbsf_reg_info *sf_r = in->private_data;
271 ssize_t retval;
272 struct page *kpage = 0;
273 size_t nsent = 0;
274
275/** @todo rig up a FsPerf test for this code */
276 TRACE();
277 if (!S_ISREG(inode->i_mode)) {
278 LogFunc(("read from non regular file %d\n", inode->i_mode));
279 return -EINVAL;
280 }
281 if (!len) {
282 return 0;
283 }
284
285 LOCK_PIPE(pipe);
286
287 uint32_t req_size = 0;
288 while (bytes_remaining > 0) {
289 kpage = alloc_page(GFP_KERNEL);
290 if (unlikely(kpage == NULL)) {
291 UNLOCK_PIPE(pipe);
292 return -ENOMEM;
293 }
294 req_size = 0;
295 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
296 uint32_t chunk = 0;
297 void *kbuf = kmap(kpage);
298 while (chunk < req_size) {
299 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
300 if (retval < 0)
301 goto err;
302 if (nread == 0)
303 break;
304 chunk += nread;
305 offset += nread;
306 nread = req_size - chunk;
307 }
308 if (!pipe->readers) {
309 send_sig(SIGPIPE, current, 0);
310 retval = -EPIPE;
311 goto err;
312 }
313 if (pipe->nrbufs < PIPE_BUFFERS) {
314 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
315 pipebuf->page = kpage;
316 pipebuf->ops = &vbsf_pipe_buf_ops;
317 pipebuf->len = req_size;
318 pipebuf->offset = 0;
319 pipebuf->private = 0;
320 pipebuf->flags = 0;
321 pipe->nrbufs++;
322 nsent += req_size;
323 bytes_remaining -= req_size;
324 if (signal_pending(current))
325 break;
326 } else { /* pipe full */
327
328 if (flags & SPLICE_F_NONBLOCK) {
329 retval = -EAGAIN;
330 goto err;
331 }
332 vbsf_free_pipebuf(kpage);
333 break;
334 }
335 }
336 UNLOCK_PIPE(pipe);
337 if (!nsent && signal_pending(current))
338 return -ERESTARTSYS;
339 *poffset += nsent;
340 return offset - orig_offset;
341
342 err:
343 UNLOCK_PIPE(pipe);
344 vbsf_free_pipebuf(kpage);
345 return retval;
346}
347
348#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
349
350
351/** Companion to vbsf_lock_user_pages(). */
352DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty)
353{
354 while (cPages-- > 0)
355 {
356 struct page *pPage = papPages[cPages];
357 if (fSetDirty && !PageReserved(pPage))
358 SetPageDirty(pPage);
359#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
360 put_page(pPage);
361#else
362 page_cache_release(pPage);
363#endif
364 }
365}
366
367
368/** Wrapper around get_user_pages. */
369DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages)
370{
371# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
372 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
373 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
374# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
375 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
376# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
377 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages,
378 fWrite, 1 /*force*/, papPages);
379# else
380 struct task_struct *pTask = current;
381 size_t cPagesLocked;
382 down_read(&pTask->mm->mmap_sem);
383 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
384 up_read(&pTask->mm->mmap_sem);
385# endif
386 if (cPagesLocked == cPages)
387 return 0;
388 if (cPagesLocked < 0)
389 return cPagesLocked;
390
391 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/);
392
393 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
394 return -EFAULT;
395}
396
397
398/**
399 * Read function used when accessing files that are memory mapped.
400 *
401 * We read from the page cache here to present the a cohertent picture of the
402 * the file content.
403 */
404static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
405{
406#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
407 struct iovec iov = { .iov_base = buf, .iov_len = size };
408 struct iov_iter iter;
409 struct kiocb kiocb;
410 ssize_t cbRet;
411
412 init_sync_kiocb(&kiocb, file);
413 kiocb.ki_pos = *off;
414 iov_iter_init(&iter, READ, &iov, 1, size);
415
416 cbRet = generic_file_read_iter(&kiocb, &iter);
417
418 *off = kiocb.ki_pos;
419 return cbRet;
420
421#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
422 struct iovec iov = { .iov_base = buf, .iov_len = size };
423 struct kiocb kiocb;
424 ssize_t cbRet;
425
426 init_sync_kiocb(&kiocb, file);
427 kiocb.ki_pos = *off;
428
429 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
430 if (cbRet == -EIOCBQUEUED)
431 cbRet = wait_on_sync_kiocb(&kiocb);
432
433 *off = kiocb.ki_pos;
434 return cbRet;
435
436#else /* 2.6.18 or earlier: */
437 return generic_file_read(file, buf, size, off);
438#endif
439}
440
441
442/**
443 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
444 * write directly to them.
445 */
446static ssize_t vbsf_reg_read_fallback(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
447 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
448{
449 /*
450 * Lock pages and execute the read, taking care not to pass the host
451 * more than it can handle in one go or more than we care to allocate
452 * page arrays for. The latter limit is set at just short of 32KB due
453 * to how the physical heap works.
454 */
455 struct page *apPagesStack[16];
456 struct page **papPages = &apPagesStack[0];
457 struct page **papPagesFree = NULL;
458 VBOXSFREADPGLSTREQ *pReq;
459 loff_t offFile = *off;
460 ssize_t cbRet = -ENOMEM;
461 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
462 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
463
464 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
465 while (!pReq && cMaxPages > 4) {
466 cMaxPages /= 2;
467 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
468 }
469 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
470 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
471 if (pReq && papPages) {
472 cbRet = 0;
473 for (;;) {
474 /*
475 * Figure out how much to process now and lock the user pages.
476 */
477 int rc;
478 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
479 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
480 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
481 if (cPages <= cMaxPages)
482 cbChunk = size;
483 else {
484 cPages = cMaxPages;
485 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
486 }
487
488 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages);
489 if (rc == 0) {
490 size_t iPage = cPages;
491 while (iPage-- > 0)
492 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
493 } else {
494 cbRet = rc;
495 break;
496 }
497
498 /*
499 * Issue the request and unlock the pages.
500 */
501 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
502
503 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/);
504
505 if (RT_SUCCESS(rc)) {
506 /*
507 * Success, advance position and buffer.
508 */
509 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
510 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
511 cbRet += cbActual;
512 offFile += cbActual;
513 buf = (uint8_t *)buf + cbActual;
514 size -= cbActual;
515
516 /*
517 * Are we done already? If so commit the new file offset.
518 */
519 if (!size || cbActual < cbChunk) {
520 *off = offFile;
521 break;
522 }
523 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
524 /*
525 * The host probably doesn't have enough heap to handle the
526 * request, reduce the page count and retry.
527 */
528 cMaxPages /= 4;
529 Assert(cMaxPages > 0);
530 } else {
531 /*
532 * If we've successfully read stuff, return it rather than
533 * the error. (Not sure if this is such a great idea...)
534 */
535 if (cbRet > 0)
536 *off = offFile;
537 else
538 cbRet = -EPROTO;
539 break;
540 }
541 }
542 }
543 if (papPagesFree)
544 kfree(papPages);
545 if (pReq)
546 VbglR0PhysHeapFree(pReq);
547 return cbRet;
548}
549
550
551/**
552 * Read from a regular file.
553 *
554 * @param file the file
555 * @param buf the buffer
556 * @param size length of the buffer
557 * @param off offset within the file (in/out).
558 * @returns the number of read bytes on success, Linux error code otherwise
559 */
560static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
561{
562 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
563 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
564 struct vbsf_reg_info *sf_r = file->private_data;
565 struct address_space *mapping = inode->i_mapping;
566
567 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
568
569 if (!S_ISREG(inode->i_mode)) {
570 LogFunc(("read from non regular file %d\n", inode->i_mode));
571 return -EINVAL;
572 }
573
574 /** @todo XXX Check read permission according to inode->i_mode! */
575
576 if (!size)
577 return 0;
578
579 /*
580 * If there is a mapping and O_DIRECT isn't in effect, we must at a
581 * heed dirty pages in the mapping and read from them. For simplicity
582 * though, we just do page cache reading when there are writable
583 * mappings around with any kind of pages loaded.
584 */
585 if ( mapping
586 && mapping->nrpages > 0
587 && mapping_writably_mapped(mapping)
588 && !(file->f_flags & O_DIRECT)
589 && 1 /** @todo make this behaviour configurable */ )
590 return vbsf_reg_read_mapped(file, buf, size, off);
591
592 /*
593 * For small requests, try use an embedded buffer provided we get a heap block
594 * that does not cross page boundraries (see host code).
595 */
596 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
597 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
598 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
599 if ( pReq
600 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
601 ssize_t cbRet;
602 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
603 if (RT_SUCCESS(vrc)) {
604 cbRet = pReq->Parms.cb32Read.u.value32;
605 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
606 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
607 *off += cbRet;
608 else
609 cbRet = -EFAULT;
610 } else
611 cbRet = -EPROTO;
612 VbglR0PhysHeapFree(pReq);
613 return cbRet;
614 }
615 if (pReq)
616 VbglR0PhysHeapFree(pReq);
617 }
618
619#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
620 /*
621 * For medium sized requests try use a bounce buffer.
622 */
623 if (size <= _64K /** @todo make this configurable? */) {
624 void *pvBounce = kmalloc(size, GFP_KERNEL);
625 if (pvBounce) {
626 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
627 if (pReq) {
628 ssize_t cbRet;
629 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
630 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
631 if (RT_SUCCESS(vrc)) {
632 cbRet = pReq->Parms.cb32Read.u.value32;
633 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
634 if (copy_to_user(buf, pvBounce, cbRet) == 0)
635 *off += cbRet;
636 else
637 cbRet = -EFAULT;
638 } else
639 cbRet = -EPROTO;
640 VbglR0PhysHeapFree(pReq);
641 kfree(pvBounce);
642 return cbRet;
643 }
644 kfree(pvBounce);
645 }
646 }
647#endif
648
649 return vbsf_reg_read_fallback(file, buf, size, off, sf_g, sf_r);
650}
651
652
653/**
654 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
655 * the changes written via vbsf_reg_write are made visible to mmap users.
656 */
657DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
658{
659 /*
660 * Only bother with this if the mapping has any pages in it.
661 *
662 * Note! According to the docs, the last parameter, end, is inclusive (we
663 * would have named it 'last' to indicate this).
664 *
665 * Note! The pre-2.6.12 function might not do enough to sure consistency
666 * when any of the pages in the range is already mapped.
667 */
668# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
669 if (mapping)
670 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
671# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
672 if (mapping && mapping->nrpages > 0)
673 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
674# else
675 /** @todo ... */
676 RT_NOREF(mapping, offStart, offEnd);
677# endif
678}
679
680
681/**
682 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
683 * write directly to them.
684 */
685static ssize_t vbsf_reg_write_fallback(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
686 struct inode *inode, struct vbsf_inode_info *sf_i,
687 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
688{
689 /*
690 * Lock pages and execute the write, taking care not to pass the host
691 * more than it can handle in one go or more than we care to allocate
692 * page arrays for. The latter limit is set at just short of 32KB due
693 * to how the physical heap works.
694 */
695 struct page *apPagesStack[16];
696 struct page **papPages = &apPagesStack[0];
697 struct page **papPagesFree = NULL;
698 VBOXSFWRITEPGLSTREQ *pReq;
699 ssize_t cbRet = -ENOMEM;
700 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
701 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
702
703 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
704 while (!pReq && cMaxPages > 4) {
705 cMaxPages /= 2;
706 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
707 }
708 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
709 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
710 if (pReq && papPages) {
711 cbRet = 0;
712 for (;;) {
713 /*
714 * Figure out how much to process now and lock the user pages.
715 */
716 int rc;
717 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
718 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
719 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
720 if (cPages <= cMaxPages)
721 cbChunk = size;
722 else {
723 cPages = cMaxPages;
724 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
725 }
726
727 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages);
728 if (rc == 0) {
729 size_t iPage = cPages;
730 while (iPage-- > 0)
731 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
732 } else {
733 cbRet = rc;
734 break;
735 }
736
737 /*
738 * Issue the request and unlock the pages.
739 */
740 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
741
742 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/);
743
744 if (RT_SUCCESS(rc)) {
745 /*
746 * Success, advance position and buffer.
747 */
748 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
749 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
750 cbRet += cbActual;
751 offFile += cbActual;
752 buf = (uint8_t *)buf + cbActual;
753 size -= cbActual;
754 if (offFile > i_size_read(inode))
755 i_size_write(inode, offFile);
756 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
757
758 /*
759 * Are we done already? If so commit the new file offset.
760 */
761 if (!size || cbActual < cbChunk) {
762 *off = offFile;
763 break;
764 }
765 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
766 /*
767 * The host probably doesn't have enough heap to handle the
768 * request, reduce the page count and retry.
769 */
770 cMaxPages /= 4;
771 Assert(cMaxPages > 0);
772 } else {
773 /*
774 * If we've successfully written stuff, return it rather than
775 * the error. (Not sure if this is such a great idea...)
776 */
777 if (cbRet > 0)
778 *off = offFile;
779 else
780 cbRet = -EPROTO;
781 break;
782 }
783 sf_i->force_restat = 1; /* mtime (and size) may have changed */
784 }
785 }
786 if (papPagesFree)
787 kfree(papPages);
788 if (pReq)
789 VbglR0PhysHeapFree(pReq);
790 return cbRet;
791}
792
793
794/**
795 * Write to a regular file.
796 *
797 * @param file the file
798 * @param buf the buffer
799 * @param size length of the buffer
800 * @param off offset within the file
801 * @returns the number of written bytes on success, Linux error code otherwise
802 */
803static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
804{
805 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
806 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
807 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
808 struct vbsf_reg_info *sf_r = file->private_data;
809 struct address_space *mapping = inode->i_mapping;
810 loff_t pos;
811
812 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
813 BUG_ON(!sf_i);
814 BUG_ON(!sf_g);
815 BUG_ON(!sf_r);
816
817 if (!S_ISREG(inode->i_mode)) {
818 LogFunc(("write to non regular file %d\n", inode->i_mode));
819 return -EINVAL;
820 }
821
822 pos = *off;
823 /** @todo This should be handled by the host, it returning the new file
824 * offset when appending. We may have an outdated i_size value here! */
825 if (file->f_flags & O_APPEND)
826 pos = i_size_read(inode);
827
828 /** @todo XXX Check write permission according to inode->i_mode! */
829
830 if (!size) {
831 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
832 *off = pos;
833 return 0;
834 }
835
836 /*
837 * If there are active writable mappings, coordinate with any
838 * pending writes via those.
839 */
840 if ( mapping
841 && mapping->nrpages > 0
842 && mapping_writably_mapped(mapping)) {
843#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
844 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
845 if (err)
846 return err;
847#else
848 /** @todo ... */
849#endif
850 }
851
852 /*
853 * For small requests, try use an embedded buffer provided we get a heap block
854 * that does not cross page boundraries (see host code).
855 */
856 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
857 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
858 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
859 if ( pReq
860 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
861 ssize_t cbRet;
862 if (copy_from_user(pReq->abData, buf, size) == 0) {
863 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
864 pos, (uint32_t)size);
865 if (RT_SUCCESS(vrc)) {
866 cbRet = pReq->Parms.cb32Write.u.value32;
867 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
868 pos += cbRet;
869 *off = pos;
870 if (pos > i_size_read(inode))
871 i_size_write(inode, pos);
872 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
873 } else
874 cbRet = -EPROTO;
875 sf_i->force_restat = 1; /* mtime (and size) may have changed */
876 } else
877 cbRet = -EFAULT;
878
879 VbglR0PhysHeapFree(pReq);
880 return cbRet;
881 }
882 if (pReq)
883 VbglR0PhysHeapFree(pReq);
884 }
885
886#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
887 /*
888 * For medium sized requests try use a bounce buffer.
889 */
890 if (size <= _64K /** @todo make this configurable? */) {
891 void *pvBounce = kmalloc(size, GFP_KERNEL);
892 if (pvBounce) {
893 if (copy_from_user(pvBounce, buf, size) == 0) {
894 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
895 if (pReq) {
896 ssize_t cbRet;
897 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
898 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
899 if (RT_SUCCESS(vrc)) {
900 cbRet = pReq->Parms.cb32Write.u.value32;
901 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
902 pos += cbRet;
903 *off = pos;
904 if (pos > i_size_read(inode))
905 i_size_write(inode, pos);
906 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
907 } else
908 cbRet = -EPROTO;
909 sf_i->force_restat = 1; /* mtime (and size) may have changed */
910 VbglR0PhysHeapFree(pReq);
911 kfree(pvBounce);
912 return cbRet;
913 }
914 kfree(pvBounce);
915 } else {
916 kfree(pvBounce);
917 return -EFAULT;
918 }
919 }
920 }
921#endif
922
923 return vbsf_reg_write_fallback(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
924}
925
926
927/**
928 * Open a regular file.
929 *
930 * @param inode the inode
931 * @param file the file
932 * @returns 0 on success, Linux error code otherwise
933 */
934static int vbsf_reg_open(struct inode *inode, struct file *file)
935{
936 int rc, rc_linux = 0;
937 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
938 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
939 struct vbsf_reg_info *sf_r;
940#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
941 struct dentry *dentry = file_dentry(file);
942#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
943 struct dentry *dentry = file->f_path.dentry;
944#else
945 struct dentry *dentry = file->f_dentry;
946#endif
947 VBOXSFCREATEREQ *pReq;
948 SHFLCREATEPARMS *pCreateParms; /* temp glue */
949
950 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n",
951 inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
952 BUG_ON(!sf_g);
953 BUG_ON(!sf_i);
954
955 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
956 if (!sf_r) {
957 LogRelFunc(("could not allocate reg info\n"));
958 return -ENOMEM;
959 }
960
961 RTListInit(&sf_r->Handle.Entry);
962 sf_r->Handle.cRefs = 1;
963 sf_r->Handle.fFlags = SF_HANDLE_F_FILE | SF_HANDLE_F_MAGIC;
964 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
965
966 /* Already open? */
967 if (sf_i->handle != SHFL_HANDLE_NIL) {
968 /*
969 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
970 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
971 * about the access flags (SHFL_CF_ACCESS_*).
972 */
973 sf_i->force_restat = 1;
974 sf_r->Handle.hHost = sf_i->handle;
975 sf_i->handle = SHFL_HANDLE_NIL;
976 file->private_data = sf_r;
977
978 sf_r->Handle.fFlags |= SF_HANDLE_F_READ | SF_HANDLE_F_WRITE; /** @todo check */
979 vbsf_handle_append(sf_i, &sf_r->Handle);
980 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
981 return 0;
982 }
983
984 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
985 if (!pReq) {
986 kfree(sf_r);
987 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
988 return -ENOMEM;
989 }
990 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
991 RT_ZERO(pReq->CreateParms);
992 pCreateParms = &pReq->CreateParms;
993 pCreateParms->Handle = SHFL_HANDLE_NIL;
994
995 /* We check the value of pCreateParms->Handle afterwards to find out if
996 * the call succeeded or failed, as the API does not seem to cleanly
997 * distinguish error and informational messages.
998 *
999 * Furthermore, we must set pCreateParms->Handle to SHFL_HANDLE_NIL to
1000 * make the shared folders host service use our fMode parameter */
1001
1002 if (file->f_flags & O_CREAT) {
1003 LogFunc(("O_CREAT set\n"));
1004 pCreateParms->CreateFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1005 /* We ignore O_EXCL, as the Linux kernel seems to call create
1006 beforehand itself, so O_EXCL should always fail. */
1007 if (file->f_flags & O_TRUNC) {
1008 LogFunc(("O_TRUNC set\n"));
1009 pCreateParms->CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1010 } else
1011 pCreateParms->CreateFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1012 } else {
1013 pCreateParms->CreateFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1014 if (file->f_flags & O_TRUNC) {
1015 LogFunc(("O_TRUNC set\n"));
1016 pCreateParms->CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1017 }
1018 }
1019
1020 switch (file->f_flags & O_ACCMODE) {
1021 case O_RDONLY:
1022 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_READ;
1023 sf_r->Handle.fFlags |= SF_HANDLE_F_READ;
1024 break;
1025
1026 case O_WRONLY:
1027 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_WRITE;
1028 sf_r->Handle.fFlags |= SF_HANDLE_F_WRITE;
1029 break;
1030
1031 case O_RDWR:
1032 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_READWRITE;
1033 sf_r->Handle.fFlags |= SF_HANDLE_F_READ | SF_HANDLE_F_WRITE;
1034 break;
1035
1036 default:
1037 BUG();
1038 }
1039
1040 if (file->f_flags & O_APPEND) {
1041 LogFunc(("O_APPEND set\n"));
1042 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_APPEND;
1043 sf_r->Handle.fFlags |= SF_HANDLE_F_APPEND;
1044 }
1045
1046 pCreateParms->Info.Attr.fMode = inode->i_mode;
1047 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n", sf_i->path->String.utf8, file->f_flags, pCreateParms->CreateFlags));
1048 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1049 if (RT_FAILURE(rc)) {
1050 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pCreateParms->CreateFlags, rc));
1051 kfree(sf_r);
1052 VbglR0PhysHeapFree(pReq);
1053 return -RTErrConvertToErrno(rc);
1054 }
1055
1056 if (pCreateParms->Handle != SHFL_HANDLE_NIL) {
1057 vbsf_dentry_chain_increase_ttl(dentry);
1058 rc_linux = 0;
1059 } else {
1060 switch (pCreateParms->Result) {
1061 case SHFL_PATH_NOT_FOUND:
1062 rc_linux = -ENOENT;
1063 break;
1064 case SHFL_FILE_NOT_FOUND:
1065 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1066 rc_linux = -ENOENT;
1067 break;
1068 case SHFL_FILE_EXISTS:
1069 vbsf_dentry_chain_increase_ttl(dentry);
1070 rc_linux = -EEXIST;
1071 break;
1072 default:
1073 vbsf_dentry_chain_increase_parent_ttl(dentry);
1074 rc_linux = 0;
1075 break;
1076 }
1077 }
1078
1079 sf_i->force_restat = 1; /** @todo Why?!? */
1080 sf_r->Handle.hHost = pCreateParms->Handle;
1081 file->private_data = sf_r;
1082 vbsf_handle_append(sf_i, &sf_r->Handle);
1083 VbglR0PhysHeapFree(pReq);
1084 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1085 return rc_linux;
1086}
1087
1088
1089/**
1090 * Close a regular file.
1091 *
1092 * @param inode the inode
1093 * @param file the file
1094 * @returns 0 on success, Linux error code otherwise
1095 */
1096static int vbsf_reg_release(struct inode *inode, struct file *file)
1097{
1098 struct vbsf_reg_info *sf_r;
1099 struct vbsf_super_info *sf_g;
1100 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1101
1102 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
1103 sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1104 sf_r = file->private_data;
1105
1106 BUG_ON(!sf_g);
1107 BUG_ON(!sf_r);
1108
1109#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1110 /* See the smbfs source (file.c). mmap in particular can cause data to be
1111 * written to the file after it is closed, which we can't cope with. We
1112 * copy and paste the body of filemap_write_and_wait() here as it was not
1113 * defined before 2.6.6 and not exported until quite a bit later. */
1114 /* filemap_write_and_wait(inode->i_mapping); */
1115 if (inode->i_mapping->nrpages
1116 && filemap_fdatawrite(inode->i_mapping) != -EIO)
1117 filemap_fdatawait(inode->i_mapping);
1118#endif
1119
1120 /* Release sf_r, closing the handle if we're the last user. */
1121 file->private_data = NULL;
1122 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
1123
1124 sf_i->handle = SHFL_HANDLE_NIL;
1125 return 0;
1126}
1127
1128/**
1129 * Wrapper around generic/default seek function that ensures that we've got
1130 * the up-to-date file size when doing anything relative to EOF.
1131 *
1132 * The issue is that the host may extend the file while we weren't looking and
1133 * if the caller wishes to append data, it may end up overwriting existing data
1134 * if we operate with a stale size. So, we always retrieve the file size on EOF
1135 * relative seeks.
1136 */
1137static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
1138{
1139 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1140
1141 switch (whence) {
1142#ifdef SEEK_HOLE
1143 case SEEK_HOLE:
1144 case SEEK_DATA:
1145#endif
1146 case SEEK_END: {
1147 struct vbsf_reg_info *sf_r = file->private_data;
1148 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost, true /*fForce*/,
1149 false /*fInodeLocked*/);
1150 if (rc == 0)
1151 break;
1152 return rc;
1153 }
1154 }
1155
1156#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1157 return generic_file_llseek(file, off, whence);
1158#else
1159 return default_llseek(file, off, whence);
1160#endif
1161}
1162
1163/**
1164 * Flush region of file - chiefly mmap/msync.
1165 *
1166 * We cannot use the noop_fsync / simple_sync_file here as that means
1167 * msync(,,MS_SYNC) will return before the data hits the host, thereby
1168 * causing coherency issues with O_DIRECT access to the same file as
1169 * well as any host interaction with the file.
1170 */
1171#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1172static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1173{
1174# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1175 return __generic_file_fsync(file, start, end, datasync);
1176# else
1177 return generic_file_fsync(file, start, end, datasync);
1178# endif
1179}
1180#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1181static int vbsf_reg_fsync(struct file *file, int datasync)
1182{
1183 return generic_file_fsync(file, datasync);
1184}
1185#else /* < 2.6.35 */
1186static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
1187{
1188# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1189 return simple_fsync(file, dentry, datasync);
1190# else
1191 int rc;
1192 struct inode *inode = dentry->d_inode;
1193 AssertReturn(inode, -EINVAL);
1194
1195 /** @todo What about file_fsync()? (<= 2.5.11) */
1196
1197# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1198 rc = sync_mapping_buffers(inode->i_mapping);
1199 if ( rc == 0
1200 && (inode->i_state & I_DIRTY)
1201 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
1202 ) {
1203 struct writeback_control wbc = {
1204 .sync_mode = WB_SYNC_ALL,
1205 .nr_to_write = 0
1206 };
1207 rc = sync_inode(inode, &wbc);
1208 }
1209# else /* < 2.5.12 */
1210 rc = fsync_inode_buffers(inode);
1211# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1212 rc |= fsync_inode_data_buffers(inode);
1213# endif
1214 /** @todo probably need to do more here... */
1215# endif /* < 2.5.12 */
1216 return rc;
1217# endif
1218}
1219#endif /* < 2.6.35 */
1220
1221
1222/**
1223 * File operations for regular files.
1224 */
1225struct file_operations vbsf_reg_fops = {
1226 .read = vbsf_reg_read,
1227 .open = vbsf_reg_open,
1228 .write = vbsf_reg_write,
1229 .release = vbsf_reg_release,
1230 .mmap = generic_file_mmap,
1231#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1232# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1233/** @todo This code is known to cause caching of data which should not be
1234 * cached. Investigate. */
1235# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1236 .splice_read = vbsf_splice_read,
1237# else
1238 .sendfile = generic_file_sendfile,
1239# endif
1240 .aio_read = generic_file_aio_read,
1241 .aio_write = generic_file_aio_write,
1242# endif
1243#endif
1244 .llseek = vbsf_reg_llseek,
1245 .fsync = vbsf_reg_fsync,
1246};
1247
1248struct inode_operations vbsf_reg_iops = {
1249#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1250 .revalidate = vbsf_inode_revalidate
1251#else
1252 .getattr = vbsf_inode_getattr,
1253 .setattr = vbsf_inode_setattr
1254#endif
1255};
1256
1257
1258#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1259
1260/**
1261 * Used to read the content of a page into the page cache.
1262 *
1263 * Needed for mmap and reads+writes when the file is mmapped in a
1264 * shared+writeable fashion.
1265 */
1266static int vbsf_readpage(struct file *file, struct page *page)
1267{
1268 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
1269 int err;
1270
1271 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1272
1273 if (!is_bad_inode(inode)) {
1274 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1275 if (pReq) {
1276 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1277 struct vbsf_reg_info *sf_r = file->private_data;
1278 uint32_t cbRead;
1279 int vrc;
1280
1281 pReq->PgLst.offFirstPage = 0;
1282 pReq->PgLst.aPages[0] = page_to_phys(page);
1283 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1284 pReq,
1285 sf_r->Handle.hHost,
1286 (uint64_t)page->index << PAGE_SHIFT,
1287 PAGE_SIZE,
1288 1 /*cPages*/);
1289
1290 cbRead = pReq->Parms.cb32Read.u.value32;
1291 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1292 VbglR0PhysHeapFree(pReq);
1293
1294 if (RT_SUCCESS(vrc)) {
1295 if (cbRead == PAGE_SIZE) {
1296 /* likely */
1297 } else {
1298 uint8_t *pbMapped = (uint8_t *)kmap(page);
1299 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1300 kunmap(page);
1301 /** @todo truncate the inode file size? */
1302 }
1303
1304 flush_dcache_page(page);
1305 SetPageUptodate(page);
1306 err = 0;
1307 } else
1308 err = -EPROTO;
1309 } else
1310 err = -ENOMEM;
1311 } else
1312 err = -EIO;
1313 unlock_page(page);
1314 return err;
1315}
1316
1317
1318/**
1319 * Used to write out the content of a dirty page cache page to the host file.
1320 *
1321 * Needed for mmap and writes when the file is mmapped in a shared+writeable
1322 * fashion.
1323 */
1324static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
1325{
1326 struct address_space *mapping = page->mapping;
1327 struct inode *inode = mapping->host;
1328 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1329 struct sf_handle *pHandle = vbsf_handle_find(sf_i, SF_HANDLE_F_WRITE, SF_HANDLE_F_APPEND);
1330 int err;
1331
1332 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1333 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1334
1335 if (pHandle) {
1336 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1337 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1338 if (pReq) {
1339 uint64_t const cbFile = i_size_read(inode);
1340 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1341 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1342 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1343 int vrc;
1344
1345 pReq->PgLst.offFirstPage = 0;
1346 pReq->PgLst.aPages[0] = page_to_phys(page);
1347 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1348 pReq,
1349 pHandle->hHost,
1350 offInFile,
1351 cbToWrite,
1352 1 /*cPages*/);
1353 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
1354 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1355 vrc = VERR_WRITE_ERROR);
1356 VbglR0PhysHeapFree(pReq);
1357
1358 if (RT_SUCCESS(vrc)) {
1359 /* Update the inode if we've extended the file. */
1360 /** @todo is this necessary given the cbToWrite calc above? */
1361 uint64_t const offEndOfWrite = offInFile + cbToWrite;
1362 if ( offEndOfWrite > cbFile
1363 && offEndOfWrite > i_size_read(inode))
1364 i_size_write(inode, offEndOfWrite);
1365
1366 if (PageError(page))
1367 ClearPageError(page);
1368
1369 err = 0;
1370 } else {
1371 ClearPageUptodate(page);
1372 err = -EPROTO;
1373 }
1374 } else
1375 err = -ENOMEM;
1376 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
1377 } else {
1378 static uint64_t volatile s_cCalls = 0;
1379 if (s_cCalls++ < 16)
1380 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1381 err = -EPROTO;
1382 }
1383 unlock_page(page);
1384 return err;
1385}
1386
1387# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1388/**
1389 * Called when writing thru the page cache (which we shouldn't be doing).
1390 */
1391int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
1392 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
1393{
1394 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1395 * the page cache for any writes AFAIK. We could just as well use
1396 * simple_write_begin & simple_write_end here if we think we really
1397 * need to have non-NULL function pointers in the table... */
1398 static uint64_t volatile s_cCalls = 0;
1399 if (s_cCalls++ < 16) {
1400 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1401 (unsigned long long)pos, len, flags);
1402 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1403 (unsigned long long)pos, len, flags);
1404# ifdef WARN_ON
1405 WARN_ON(1);
1406# endif
1407 }
1408 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1409}
1410# endif /* KERNEL_VERSION >= 2.6.24 */
1411
1412# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1413/**
1414 * This is needed to make open accept O_DIRECT as well as dealing with direct
1415 * I/O requests if we don't intercept them earlier.
1416 */
1417# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1418static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1419# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1420static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1421# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1422static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1423# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1424static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1425# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1426static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1427# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1428static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1429# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1430static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1431# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1432static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
1433# else
1434static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
1435# endif
1436{
1437 TRACE();
1438 return -EINVAL;
1439}
1440# endif
1441
1442/**
1443 * Address space (for the page cache) operations for regular files.
1444 */
1445struct address_space_operations vbsf_reg_aops = {
1446 .readpage = vbsf_readpage,
1447 .writepage = vbsf_writepage,
1448 /** @todo Need .writepages if we want msync performance... */
1449# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1450 .set_page_dirty = __set_page_dirty_buffers,
1451# endif
1452# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1453 .write_begin = vbsf_write_begin,
1454 .write_end = simple_write_end,
1455# else
1456 .prepare_write = simple_prepare_write,
1457 .commit_write = simple_commit_write,
1458# endif
1459# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1460 .direct_IO = vbsf_direct_IO,
1461# endif
1462};
1463
1464#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1465
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette