VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77661

最後變更 在這個檔案從77661是 77631,由 vboxsync 提交於 6 年 前

linux/vboxsf: Implemented write_iter as well. Can write to loop mounted ext4 image now. bugref:9172 ticketref:17360

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 89.4 KB
 
1/* $Id: regops.c 77631 2019-03-10 04:14:09Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31
32/*********************************************************************************************************************************
33* Header Files *
34*********************************************************************************************************************************/
35#include "vfsmod.h"
36#include <linux/uio.h>
37#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
38# include <linux/aio.h> /* struct kiocb before 4.1 */
39#endif
40#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/buffer_head.h>
42#endif
43#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
44 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
45# include <linux/writeback.h>
46#endif
47#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
48 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
49# include <linux/splice.h>
50#endif
51#include <iprt/err.h>
52
53#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
54# define SEEK_END 2
55#endif
56
57
58/*********************************************************************************************************************************
59* Structures and Typedefs *
60*********************************************************************************************************************************/
61#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
62/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */
63struct vbsf_iter_stash {
64 struct page *pPage;
65 size_t off;
66 size_t cb;
67# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
68 size_t offFromEnd;
69 struct iov_iter Copy;
70# endif
71};
72#endif /* >= 3.16.0 */
73/** Initializer for struct vbsf_iter_stash. */
74#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
75# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 }
76#else
77# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 }
78#endif
79
80
81
82/**
83 * Called when an inode is released to unlink all handles that might impossibly
84 * still be associated with it.
85 *
86 * @param pInodeInfo The inode which handles to drop.
87 */
88void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
89{
90 struct vbsf_handle *pCur, *pNext;
91 unsigned long fSavedFlags;
92 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
93 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
94
95 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
96 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
97 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
98 pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
99 RTListNodeRemove(&pCur->Entry);
100 }
101
102 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
103}
104
105
106/**
107 * Locates a handle that matches all the flags in @a fFlags.
108 *
109 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
110 * release it. NULL if no suitable handle was found.
111 * @param pInodeInfo The inode info to search.
112 * @param fFlagsSet The flags that must be set.
113 * @param fFlagsClear The flags that must be clear.
114 */
115struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
116{
117 struct vbsf_handle *pCur;
118 unsigned long fSavedFlags;
119 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
120
121 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
122 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
123 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
124 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
125 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
126 if (cRefs > 1) {
127 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
128 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
129 return pCur;
130 }
131 /* Oops, already being closed (safe as it's only ever increased here). */
132 ASMAtomicDecU32(&pCur->cRefs);
133 }
134 }
135
136 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
137 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
138 return NULL;
139}
140
141
142/**
143 * Slow worker for vbsf_handle_release() that does the freeing.
144 *
145 * @returns 0 (ref count).
146 * @param pHandle The handle to release.
147 * @param sf_g The info structure for the shared folder associated
148 * with the handle.
149 * @param pszCaller The caller name (for logging failures).
150 */
151uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
152{
153 int rc;
154 unsigned long fSavedFlags;
155
156 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
157
158 /*
159 * Remove from the list.
160 */
161 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
162
163 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
164 Assert(pHandle->pInodeInfo);
165 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
166
167 if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
168 pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
169 RTListNodeRemove(&pHandle->Entry);
170 }
171
172 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
173
174 /*
175 * Actually destroy it.
176 */
177 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
178 if (RT_FAILURE(rc))
179 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
180 pHandle->hHost = SHFL_HANDLE_NIL;
181 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
182 kfree(pHandle);
183 return 0;
184}
185
186
187/**
188 * Appends a handle to a handle list.
189 *
190 * @param pInodeInfo The inode to add it to.
191 * @param pHandle The handle to add.
192 */
193void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
194{
195#ifdef VBOX_STRICT
196 struct vbsf_handle *pCur;
197#endif
198 unsigned long fSavedFlags;
199
200 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
201 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
202 ("%p %#x\n", pHandle, pHandle->fFlags));
203 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
204
205 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
206
207 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
208 ("%p %#x\n", pHandle, pHandle->fFlags));
209#ifdef VBOX_STRICT
210 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
211 Assert(pCur != pHandle);
212 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
213 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
214 }
215 pHandle->pInodeInfo = pInodeInfo;
216#endif
217
218 pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
219 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
220
221 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
222}
223
224
225#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
226 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
227
228/*
229 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
230 */
231
232static void vbsf_free_pipebuf(struct page *kpage)
233{
234 kunmap(kpage);
235 __free_pages(kpage, 0);
236}
237
238static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
239{
240 return 0;
241}
242
243static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
244{
245}
246
247static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
248{
249}
250
251static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
252{
253 return 0;
254}
255
256static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
257{
258 vbsf_free_pipebuf(pipe_buf->page);
259}
260
261static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
262{
263 return 0;
264}
265
266static struct pipe_buf_operations vbsf_pipe_buf_ops = {
267 .can_merge = 0,
268 .map = vbsf_pipe_buf_map,
269 .unmap = vbsf_pipe_buf_unmap,
270 .confirm = vbsf_pipe_buf_confirm,
271 .release = vbsf_pipe_buf_release,
272 .steal = vbsf_pipe_buf_steal,
273 .get = vbsf_pipe_buf_get,
274};
275
276static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
277 void *buf, uint32_t *nread, uint64_t pos)
278{
279 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
280 if (RT_FAILURE(rc)) {
281 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
282 rc));
283 return -EPROTO;
284 }
285 return 0;
286}
287
288# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
289# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
290
291ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
292{
293 size_t bytes_remaining = len;
294 loff_t orig_offset = *poffset;
295 loff_t offset = orig_offset;
296 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
297 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
298 struct vbsf_reg_info *sf_r = in->private_data;
299 ssize_t retval;
300 struct page *kpage = 0;
301 size_t nsent = 0;
302
303/** @todo rig up a FsPerf test for this code */
304 TRACE();
305 if (!S_ISREG(inode->i_mode)) {
306 LogFunc(("read from non regular file %d\n", inode->i_mode));
307 return -EINVAL;
308 }
309 if (!len) {
310 return 0;
311 }
312
313 LOCK_PIPE(pipe);
314
315 uint32_t req_size = 0;
316 while (bytes_remaining > 0) {
317 kpage = alloc_page(GFP_KERNEL);
318 if (unlikely(kpage == NULL)) {
319 UNLOCK_PIPE(pipe);
320 return -ENOMEM;
321 }
322 req_size = 0;
323 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
324 uint32_t chunk = 0;
325 void *kbuf = kmap(kpage);
326 while (chunk < req_size) {
327 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
328 if (retval < 0)
329 goto err;
330 if (nread == 0)
331 break;
332 chunk += nread;
333 offset += nread;
334 nread = req_size - chunk;
335 }
336 if (!pipe->readers) {
337 send_sig(SIGPIPE, current, 0);
338 retval = -EPIPE;
339 goto err;
340 }
341 if (pipe->nrbufs < PIPE_BUFFERS) {
342 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
343 pipebuf->page = kpage;
344 pipebuf->ops = &vbsf_pipe_buf_ops;
345 pipebuf->len = req_size;
346 pipebuf->offset = 0;
347 pipebuf->private = 0;
348 pipebuf->flags = 0;
349 pipe->nrbufs++;
350 nsent += req_size;
351 bytes_remaining -= req_size;
352 if (signal_pending(current))
353 break;
354 } else { /* pipe full */
355
356 if (flags & SPLICE_F_NONBLOCK) {
357 retval = -EAGAIN;
358 goto err;
359 }
360 vbsf_free_pipebuf(kpage);
361 break;
362 }
363 }
364 UNLOCK_PIPE(pipe);
365 if (!nsent && signal_pending(current))
366 return -ERESTARTSYS;
367 *poffset += nsent;
368 return offset - orig_offset;
369
370 err:
371 UNLOCK_PIPE(pipe);
372 vbsf_free_pipebuf(kpage);
373 return retval;
374}
375
376#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
377
378/**
379 * Helper for deciding wheter we should do a read via the page cache or not.
380 *
381 * By default we will only use the page cache if there is a writable memory
382 * mapping of the file with a chance that it may have modified any of the pages
383 * already.
384 */
385DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *sf_g)
386{
387 return mapping
388 && mapping->nrpages > 0
389 && mapping_writably_mapped(mapping)
390 && !(file->f_flags & O_DIRECT)
391 && 1 /** @todo make this behaviour configurable at mount time (sf_g) */;
392}
393
394/** Wrapper around put_page / page_cache_release. */
395DECLINLINE(void) vbsf_put_page(struct page *pPage)
396{
397#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
398 put_page(pPage);
399#else
400 page_cache_release(pPage);
401#endif
402}
403
404
405/** Wrapper around get_page / page_cache_get. */
406DECLINLINE(void) vbsf_get_page(struct page *pPage)
407{
408#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
409 get_page(pPage);
410#else
411 page_cache_get(pPage);
412#endif
413}
414
415
416/** Companion to vbsf_lock_user_pages(). */
417DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
418{
419 /* We don't mark kernel pages dirty: */
420 if (fLockPgHack)
421 fSetDirty = false;
422
423 while (cPages-- > 0)
424 {
425 struct page *pPage = papPages[cPages];
426 if (fSetDirty && !PageReserved(pPage))
427 SetPageDirty(pPage);
428 vbsf_put_page(pPage);
429 }
430}
431
432
433/**
434 * Worker for vbsf_lock_user_pages_failed_check_kernel() and
435 * vbsf_iter_lock_pages().
436 */
437static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages)
438{
439 uintptr_t const uPtrFrom = (uintptr_t)pbStart;
440 uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
441 uint8_t *pbPage = (uint8_t *)uPtrLast;
442 size_t iPage = cPages;
443
444 /*
445 * Touch the pages first (paranoia^2).
446 */
447 if (fWrite) {
448 uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
449 while (iPage-- > 0) {
450 *pbProbe = *pbProbe;
451 pbProbe += PAGE_SIZE;
452 }
453 } else {
454 uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
455 while (iPage-- > 0) {
456 ASMProbeReadByte(pbProbe);
457 pbProbe += PAGE_SIZE;
458 }
459 }
460
461 /*
462 * Get the pages.
463 * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
464 */
465 iPage = cPages;
466 if ( uPtrFrom >= (unsigned long)__va(0)
467 && uPtrLast < (unsigned long)high_memory) {
468 /* The physical page mapping area: */
469 while (iPage-- > 0) {
470 struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
471 vbsf_get_page(pPage);
472 pbPage -= PAGE_SIZE;
473 }
474 } else {
475 /* This is vmalloc or some such thing, so go thru page tables: */
476 while (iPage-- > 0) {
477 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
478 if (pPage) {
479 papPages[iPage] = pPage;
480 vbsf_get_page(pPage);
481 pbPage -= PAGE_SIZE;
482 } else {
483 while (++iPage < cPages) {
484 pPage = papPages[iPage];
485 vbsf_put_page(pPage);
486 }
487 return -EFAULT;
488 }
489 }
490 }
491 return 0;
492}
493
494
495/**
496 * Catches kernel_read() and kernel_write() calls and works around them.
497 *
498 * The file_operations::read and file_operations::write callbacks supposedly
499 * hands us the user buffers to read into and write out of. To allow the kernel
500 * to read and write without allocating buffers in userland, they kernel_read()
501 * and kernel_write() increases the user space address limit before calling us
502 * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
503 * works on the userspace address space structures and will not be fooled by an
504 * increased addr_limit.
505 *
506 * This code tries to detect this situation and fake get_user_lock() for the
507 * kernel buffer.
508 */
509static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
510 struct page **papPages, bool *pfLockPgHack)
511{
512 /*
513 * Check that this is valid user memory that is actually in the kernel range.
514 */
515#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
516 if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
517 && uPtrFrom >= USER_DS.seg)
518#else
519 if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
520 && uPtrFrom >= USER_DS.seg)
521#endif
522 {
523 int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages);
524 if (rc == 0) {
525 *pfLockPgHack = true;
526 return 0;
527 }
528 }
529
530 return rcFailed;
531}
532
533
534/** Wrapper around get_user_pages. */
535DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
536{
537# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
538 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
539 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
540# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
541 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
542# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
543 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
544# else
545 struct task_struct *pTask = current;
546 size_t cPagesLocked;
547 down_read(&pTask->mm->mmap_sem);
548 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
549 up_read(&pTask->mm->mmap_sem);
550# endif
551 *pfLockPgHack = false;
552 if (cPagesLocked == cPages)
553 return 0;
554
555 /*
556 * It failed.
557 */
558 if (cPagesLocked < 0)
559 return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
560
561 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
562
563 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
564 return -EFAULT;
565}
566
567
568/**
569 * Read function used when accessing files that are memory mapped.
570 *
571 * We read from the page cache here to present the a cohertent picture of the
572 * the file content.
573 */
574static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
575{
576#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
577 struct iovec iov = { .iov_base = buf, .iov_len = size };
578 struct iov_iter iter;
579 struct kiocb kiocb;
580 ssize_t cbRet;
581
582 init_sync_kiocb(&kiocb, file);
583 kiocb.ki_pos = *off;
584 iov_iter_init(&iter, READ, &iov, 1, size);
585
586 cbRet = generic_file_read_iter(&kiocb, &iter);
587
588 *off = kiocb.ki_pos;
589 return cbRet;
590
591#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
592 struct iovec iov = { .iov_base = buf, .iov_len = size };
593 struct kiocb kiocb;
594 ssize_t cbRet;
595
596 init_sync_kiocb(&kiocb, file);
597 kiocb.ki_pos = *off;
598
599 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
600 if (cbRet == -EIOCBQUEUED)
601 cbRet = wait_on_sync_kiocb(&kiocb);
602
603 *off = kiocb.ki_pos;
604 return cbRet;
605
606#else /* 2.6.18 or earlier: */
607 return generic_file_read(file, buf, size, off);
608#endif
609}
610
611
612/**
613 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
614 * write directly to them.
615 */
616static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
617 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
618{
619 /*
620 * Lock pages and execute the read, taking care not to pass the host
621 * more than it can handle in one go or more than we care to allocate
622 * page arrays for. The latter limit is set at just short of 32KB due
623 * to how the physical heap works.
624 */
625 struct page *apPagesStack[16];
626 struct page **papPages = &apPagesStack[0];
627 struct page **papPagesFree = NULL;
628 VBOXSFREADPGLSTREQ *pReq;
629 loff_t offFile = *off;
630 ssize_t cbRet = -ENOMEM;
631 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
632 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
633 bool fLockPgHack;
634
635 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
636 while (!pReq && cMaxPages > 4) {
637 cMaxPages /= 2;
638 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
639 }
640 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
641 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
642 if (pReq && papPages) {
643 cbRet = 0;
644 for (;;) {
645 /*
646 * Figure out how much to process now and lock the user pages.
647 */
648 int rc;
649 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
650 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
651 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
652 if (cPages <= cMaxPages)
653 cbChunk = size;
654 else {
655 cPages = cMaxPages;
656 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
657 }
658
659 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
660 if (rc == 0) {
661 size_t iPage = cPages;
662 while (iPage-- > 0)
663 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
664 } else {
665 cbRet = rc;
666 break;
667 }
668
669 /*
670 * Issue the request and unlock the pages.
671 */
672 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
673
674 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
675
676 if (RT_SUCCESS(rc)) {
677 /*
678 * Success, advance position and buffer.
679 */
680 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
681 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
682 cbRet += cbActual;
683 offFile += cbActual;
684 buf = (uint8_t *)buf + cbActual;
685 size -= cbActual;
686
687 /*
688 * Are we done already? If so commit the new file offset.
689 */
690 if (!size || cbActual < cbChunk) {
691 *off = offFile;
692 break;
693 }
694 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
695 /*
696 * The host probably doesn't have enough heap to handle the
697 * request, reduce the page count and retry.
698 */
699 cMaxPages /= 4;
700 Assert(cMaxPages > 0);
701 } else {
702 /*
703 * If we've successfully read stuff, return it rather than
704 * the error. (Not sure if this is such a great idea...)
705 */
706 if (cbRet > 0)
707 *off = offFile;
708 else
709 cbRet = -EPROTO;
710 break;
711 }
712 }
713 }
714 if (papPagesFree)
715 kfree(papPages);
716 if (pReq)
717 VbglR0PhysHeapFree(pReq);
718 return cbRet;
719}
720
721
722/**
723 * Read from a regular file.
724 *
725 * @param file the file
726 * @param buf the buffer
727 * @param size length of the buffer
728 * @param off offset within the file (in/out).
729 * @returns the number of read bytes on success, Linux error code otherwise
730 */
731static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
732{
733 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
734 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
735 struct vbsf_reg_info *sf_r = file->private_data;
736 struct address_space *mapping = inode->i_mapping;
737
738 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
739
740 if (!S_ISREG(inode->i_mode)) {
741 LogFunc(("read from non regular file %d\n", inode->i_mode));
742 return -EINVAL;
743 }
744
745 /** @todo XXX Check read permission according to inode->i_mode! */
746
747 if (!size)
748 return 0;
749
750 /*
751 * If there is a mapping and O_DIRECT isn't in effect, we must at a
752 * heed dirty pages in the mapping and read from them. For simplicity
753 * though, we just do page cache reading when there are writable
754 * mappings around with any kind of pages loaded.
755 */
756 if (vbsf_should_use_cached_read(file, mapping, sf_g))
757 return vbsf_reg_read_mapped(file, buf, size, off);
758
759 /*
760 * For small requests, try use an embedded buffer provided we get a heap block
761 * that does not cross page boundraries (see host code).
762 */
763 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
764 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
765 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
766 if (pReq) {
767 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
768 ssize_t cbRet;
769 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
770 if (RT_SUCCESS(vrc)) {
771 cbRet = pReq->Parms.cb32Read.u.value32;
772 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
773 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
774 *off += cbRet;
775 else
776 cbRet = -EFAULT;
777 } else
778 cbRet = -EPROTO;
779 VbglR0PhysHeapFree(pReq);
780 return cbRet;
781 }
782 VbglR0PhysHeapFree(pReq);
783 }
784 }
785
786#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
787 /*
788 * For medium sized requests try use a bounce buffer.
789 */
790 if (size <= _64K /** @todo make this configurable? */) {
791 void *pvBounce = kmalloc(size, GFP_KERNEL);
792 if (pvBounce) {
793 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
794 if (pReq) {
795 ssize_t cbRet;
796 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
797 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
798 if (RT_SUCCESS(vrc)) {
799 cbRet = pReq->Parms.cb32Read.u.value32;
800 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
801 if (copy_to_user(buf, pvBounce, cbRet) == 0)
802 *off += cbRet;
803 else
804 cbRet = -EFAULT;
805 } else
806 cbRet = -EPROTO;
807 VbglR0PhysHeapFree(pReq);
808 kfree(pvBounce);
809 return cbRet;
810 }
811 kfree(pvBounce);
812 }
813 }
814#endif
815
816 return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
817}
818
819
820/**
821 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
822 * the changes written via vbsf_reg_write are made visible to mmap users.
823 */
824DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
825{
826 /*
827 * Only bother with this if the mapping has any pages in it.
828 *
829 * Note! According to the docs, the last parameter, end, is inclusive (we
830 * would have named it 'last' to indicate this).
831 *
832 * Note! The pre-2.6.12 function might not do enough to sure consistency
833 * when any of the pages in the range is already mapped.
834 */
835# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
836 if (mapping)
837 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
838# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
839 if (mapping && mapping->nrpages > 0)
840 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
841# else
842 /** @todo ... */
843 RT_NOREF(mapping, offStart, offEnd);
844# endif
845}
846
847
848/**
849 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
850 * write directly to them.
851 */
852static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
853 struct inode *inode, struct vbsf_inode_info *sf_i,
854 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
855{
856 /*
857 * Lock pages and execute the write, taking care not to pass the host
858 * more than it can handle in one go or more than we care to allocate
859 * page arrays for. The latter limit is set at just short of 32KB due
860 * to how the physical heap works.
861 */
862 struct page *apPagesStack[16];
863 struct page **papPages = &apPagesStack[0];
864 struct page **papPagesFree = NULL;
865 VBOXSFWRITEPGLSTREQ *pReq;
866 ssize_t cbRet = -ENOMEM;
867 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
868 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
869 bool fLockPgHack;
870
871 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
872 while (!pReq && cMaxPages > 4) {
873 cMaxPages /= 2;
874 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
875 }
876 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
877 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
878 if (pReq && papPages) {
879 cbRet = 0;
880 for (;;) {
881 /*
882 * Figure out how much to process now and lock the user pages.
883 */
884 int rc;
885 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
886 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
887 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
888 if (cPages <= cMaxPages)
889 cbChunk = size;
890 else {
891 cPages = cMaxPages;
892 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
893 }
894
895 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
896 if (rc == 0) {
897 size_t iPage = cPages;
898 while (iPage-- > 0)
899 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
900 } else {
901 cbRet = rc;
902 break;
903 }
904
905 /*
906 * Issue the request and unlock the pages.
907 */
908 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
909
910 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
911
912 if (RT_SUCCESS(rc)) {
913 /*
914 * Success, advance position and buffer.
915 */
916 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
917 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
918 cbRet += cbActual;
919 offFile += cbActual;
920 buf = (uint8_t *)buf + cbActual;
921 size -= cbActual;
922 if (offFile > i_size_read(inode))
923 i_size_write(inode, offFile);
924 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
925 sf_i->force_restat = 1; /* mtime (and size) may have changed */
926
927 /*
928 * Are we done already? If so commit the new file offset.
929 */
930 if (!size || cbActual < cbChunk) {
931 *off = offFile;
932 break;
933 }
934 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
935 /*
936 * The host probably doesn't have enough heap to handle the
937 * request, reduce the page count and retry.
938 */
939 cMaxPages /= 4;
940 Assert(cMaxPages > 0);
941 } else {
942 /*
943 * If we've successfully written stuff, return it rather than
944 * the error. (Not sure if this is such a great idea...)
945 */
946 if (cbRet > 0)
947 *off = offFile;
948 else
949 cbRet = -EPROTO;
950 break;
951 }
952 }
953 }
954 if (papPagesFree)
955 kfree(papPages);
956 if (pReq)
957 VbglR0PhysHeapFree(pReq);
958 return cbRet;
959}
960
961
962/**
963 * Write to a regular file.
964 *
965 * @param file the file
966 * @param buf the buffer
967 * @param size length of the buffer
968 * @param off offset within the file
969 * @returns the number of written bytes on success, Linux error code otherwise
970 */
971static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
972{
973 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
974 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
975 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
976 struct vbsf_reg_info *sf_r = file->private_data;
977 struct address_space *mapping = inode->i_mapping;
978 loff_t pos;
979
980 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
981 BUG_ON(!sf_i);
982 BUG_ON(!sf_g);
983 BUG_ON(!sf_r);
984 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
985
986 pos = *off;
987 /** @todo This should be handled by the host, it returning the new file
988 * offset when appending. We may have an outdated i_size value here! */
989 if (file->f_flags & O_APPEND)
990 pos = i_size_read(inode);
991
992 /** @todo XXX Check write permission according to inode->i_mode! */
993
994 if (!size) {
995 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
996 *off = pos;
997 return 0;
998 }
999
1000 /*
1001 * If there are active writable mappings, coordinate with any
1002 * pending writes via those.
1003 */
1004 if ( mapping
1005 && mapping->nrpages > 0
1006 && mapping_writably_mapped(mapping)) {
1007#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1008 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
1009 if (err)
1010 return err;
1011#else
1012 /** @todo ... */
1013#endif
1014 }
1015
1016 /*
1017 * For small requests, try use an embedded buffer provided we get a heap block
1018 * that does not cross page boundraries (see host code).
1019 */
1020 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1021 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
1022 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1023 if ( pReq
1024 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1025 ssize_t cbRet;
1026 if (copy_from_user(pReq->abData, buf, size) == 0) {
1027 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1028 pos, (uint32_t)size);
1029 if (RT_SUCCESS(vrc)) {
1030 cbRet = pReq->Parms.cb32Write.u.value32;
1031 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1032 pos += cbRet;
1033 *off = pos;
1034 if (pos > i_size_read(inode))
1035 i_size_write(inode, pos);
1036 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1037 } else
1038 cbRet = -EPROTO;
1039 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1040 } else
1041 cbRet = -EFAULT;
1042
1043 VbglR0PhysHeapFree(pReq);
1044 return cbRet;
1045 }
1046 if (pReq)
1047 VbglR0PhysHeapFree(pReq);
1048 }
1049
1050#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1051 /*
1052 * For medium sized requests try use a bounce buffer.
1053 */
1054 if (size <= _64K /** @todo make this configurable? */) {
1055 void *pvBounce = kmalloc(size, GFP_KERNEL);
1056 if (pvBounce) {
1057 if (copy_from_user(pvBounce, buf, size) == 0) {
1058 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1059 if (pReq) {
1060 ssize_t cbRet;
1061 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1062 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1063 if (RT_SUCCESS(vrc)) {
1064 cbRet = pReq->Parms.cb32Write.u.value32;
1065 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1066 pos += cbRet;
1067 *off = pos;
1068 if (pos > i_size_read(inode))
1069 i_size_write(inode, pos);
1070 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1071 } else
1072 cbRet = -EPROTO;
1073 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1074 VbglR0PhysHeapFree(pReq);
1075 kfree(pvBounce);
1076 return cbRet;
1077 }
1078 kfree(pvBounce);
1079 } else {
1080 kfree(pvBounce);
1081 return -EFAULT;
1082 }
1083 }
1084 }
1085#endif
1086
1087 return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1088}
1089
1090#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1091
1092/**
1093 * Companion to vbsf_iter_lock_pages().
1094 */
1095DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty)
1096{
1097 /* We don't mark kernel pages dirty: */
1098 if (iter->type & ITER_KVEC)
1099 fSetDirty = false;
1100
1101 while (cPages-- > 0)
1102 {
1103 struct page *pPage = papPages[cPages];
1104 if (fSetDirty && !PageReserved(pPage))
1105 SetPageDirty(pPage);
1106 vbsf_put_page(pPage);
1107 }
1108}
1109
1110
1111/**
1112 * Locks up to @a cMaxPages from the I/O vector iterator, advancing the
1113 * iterator.
1114 *
1115 * @returns 0 on success, negative errno value on failure.
1116 * @param iter The iterator to lock pages from.
1117 * @param fWrite Whether to write (true) or read (false) lock the pages.
1118 * @param pStash Where we stash peek results.
1119 * @param cMaxPages The maximum number of pages to get.
1120 * @param papPages Where to return the locked pages.
1121 * @param pcPages Where to return the number of pages.
1122 * @param poffPage0 Where to return the offset into the first page.
1123 * @param pcbChunk Where to return the number of bytes covered.
1124 */
1125static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages,
1126 struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk)
1127{
1128 size_t cbChunk = 0;
1129 size_t cPages = 0;
1130 size_t offPage0 = 0;
1131 int rc = 0;
1132
1133 Assert(iov_iter_count(iter) + pStash->cb > 0);
1134 if (!(iter->type & ITER_KVEC)) {
1135 /*
1136 * Do we have a stashed page?
1137 */
1138 if (pStash->pPage) {
1139 papPages[0] = pStash->pPage;
1140 offPage0 = pStash->off;
1141 cbChunk = pStash->cb;
1142 cPages = 1;
1143 pStash->pPage = NULL;
1144 pStash->off = 0;
1145 pStash->cb = 0;
1146 if ( offPage0 + cbChunk < PAGE_SIZE
1147 || iov_iter_count(iter) == 0) {
1148 *poffPage0 = offPage0;
1149 *pcbChunk = cbChunk;
1150 *pcPages = cPages;
1151 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n",
1152 rc, cPages, offPage0, cbChunk));
1153 return 0;
1154 }
1155 cMaxPages -= 1;
1156 SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk));
1157 } else {
1158# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1159 /*
1160 * Copy out our starting point to assist rewinding.
1161 */
1162 pStash->offFromEnd = iov_iter_count(iter);
1163 pStash->Copy = *iter;
1164# endif
1165 }
1166
1167 /*
1168 * Get pages segment by segment.
1169 */
1170 do {
1171 /*
1172 * Make a special case of the first time thru here, since that's
1173 * the most typical scenario.
1174 */
1175 ssize_t cbSegRet;
1176 if (cPages == 0) {
1177 cbSegRet = iov_iter_get_pages(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0);
1178 if (cbSegRet > 0) {
1179 iov_iter_advance(iter, cbSegRet);
1180 cbChunk = (size_t)cbSegRet;
1181 cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1182 cMaxPages -= cPages;
1183 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages));
1184 if ( cMaxPages == 0
1185 || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK))
1186 break;
1187 } else {
1188 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1189 rc = (int)cbSegRet;
1190 break;
1191 }
1192 } else {
1193 /*
1194 * Probe first page of new segment to check that we've got a zero offset and
1195 * can continue on the current chunk. Stash the page if the offset isn't zero.
1196 */
1197 size_t offPgProbe;
1198 size_t cbSeg = iov_iter_single_seg_count(iter);
1199 while (!cbSeg) {
1200 iov_iter_advance(iter, 0);
1201 cbSeg = iov_iter_single_seg_count(iter);
1202 }
1203 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe);
1204 if (cbSegRet > 0) {
1205 iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */
1206 Assert(offPgProbe + cbSegRet <= PAGE_SIZE);
1207 if (offPgProbe == 0) {
1208 cbChunk += cbSegRet;
1209 cPages += 1;
1210 cMaxPages -= 1;
1211 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe));
1212 if ( cMaxPages == 0
1213 || cbSegRet != PAGE_SIZE)
1214 break;
1215
1216 /*
1217 * Get the rest of the segment (if anything remaining).
1218 */
1219 cbSeg -= cbSegRet;
1220 if (cbSeg > 0) {
1221 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe);
1222 if (cbSegRet > 0) {
1223 size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1224 Assert(offPgProbe == 0);
1225 iov_iter_advance(iter, cbSegRet);
1226 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet));
1227 cPages += cPgRet;
1228 cMaxPages -= cPgRet;
1229 cbChunk += cbSegRet;
1230 if ( cMaxPages == 0
1231 || ((size_t)cbSegRet & PAGE_OFFSET_MASK))
1232 break;
1233 } else {
1234 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1235 rc = (int)cbSegRet;
1236 break;
1237 }
1238 }
1239 } else {
1240 /* The segment didn't start at a page boundrary, so stash it for
1241 the next round: */
1242 SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe));
1243 Assert(papPages[cPages]);
1244 pStash->pPage = papPages[cPages];
1245 pStash->off = offPgProbe;
1246 pStash->cb = cbSegRet;
1247 break;
1248 }
1249 } else {
1250 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1251 rc = (int)cbSegRet;
1252 break;
1253 }
1254 }
1255 Assert(cMaxPages > 0);
1256 } while (iov_iter_count(iter) > 0);
1257
1258 } else {
1259 /*
1260 * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs,
1261 * so everyone needs to do that by themselves.
1262 *
1263 * Note! Fixes here may apply to rtR0MemObjNativeLockKernel()
1264 * and vbsf_lock_user_pages_failed_check_kernel() as well.
1265 */
1266# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1267 pStash->offFromEnd = iov_iter_count(iter);
1268 pStash->Copy = *iter;
1269# endif
1270 do {
1271 uint8_t *pbBuf;
1272 size_t offStart;
1273 size_t cPgSeg;
1274
1275 size_t cbSeg = iov_iter_single_seg_count(iter);
1276 while (!cbSeg) {
1277 iov_iter_advance(iter, 0);
1278 cbSeg = iov_iter_single_seg_count(iter);
1279 }
1280
1281# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
1282 pbBuf = iter->kvec->iov_base + iter->iov_offset;
1283# else
1284 pbBuf = iter->iov->iov_base + iter->iov_offset;
1285# endif
1286 offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK;
1287 if (!cPages)
1288 offPage0 = offStart;
1289 else if (offStart)
1290 break;
1291
1292 cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT;
1293 if (cPgSeg > cMaxPages) {
1294 cPgSeg = cMaxPages;
1295 cbSeg = (cPgSeg << PAGE_SHIFT) - offStart;
1296 }
1297
1298 rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]);
1299 if (rc == 0) {
1300 iov_iter_advance(iter, cbSeg);
1301 cbChunk += cbSeg;
1302 cPages += cPgSeg;
1303 cMaxPages -= cPgSeg;
1304 if ( cMaxPages == 0
1305 || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0)
1306 break;
1307 } else
1308 break;
1309 } while (iov_iter_count(iter) > 0);
1310 }
1311
1312 /*
1313 * Clean up if we failed; set return values.
1314 */
1315 if (rc == 0) {
1316 /* likely */
1317 } else {
1318 if (cPages > 0)
1319 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1320 offPage0 = cbChunk = cPages = 0;
1321 }
1322 *poffPage0 = offPage0;
1323 *pcbChunk = cbChunk;
1324 *pcPages = cPages;
1325 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk));
1326 return rc;
1327}
1328
1329
1330/**
1331 * Rewinds the I/O vector.
1332 */
1333static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk)
1334{
1335 size_t cbExtra;
1336 if (!pStash->pPage) {
1337 cbExtra = 0;
1338 } else {
1339 cbExtra = pStash->cb;
1340 vbsf_put_page(pStash->pPage);
1341 pStash->pPage = NULL;
1342 pStash->cb = 0;
1343 pStash->off = 0;
1344 }
1345
1346# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1347 iov_iter_revert(iter, cbToRewind + cbExtra);
1348 return true;
1349# else
1350 /** @todo impl this */
1351 return false;
1352# endif
1353}
1354
1355
1356/**
1357 * Cleans up the page locking stash.
1358 */
1359DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash)
1360{
1361 if (pStash->pPage)
1362 vbsf_iter_rewind(iter, pStash, 0, 0);
1363}
1364
1365
1366/**
1367 * Calculates the longest span of pages we could transfer to the host in a
1368 * single request.
1369 *
1370 * @returns Page count, non-zero.
1371 * @param iter The I/O vector iterator to inspect.
1372 */
1373static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter)
1374{
1375 size_t cPages;
1376 if (iter_is_iovec(iter) || (iter->type & ITER_KVEC)) {
1377 const struct iovec *pCurIov = iter->iov;
1378 size_t cLeft = iter->nr_segs;
1379 size_t cPagesSpan = 0;
1380
1381 /* iovect and kvec are identical, except for the __user tagging of iov_base. */
1382 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base);
1383 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len);
1384 AssertCompile(sizeof(struct iovec) == sizeof(struct kvec));
1385
1386 cPages = 1;
1387 AssertReturn(cLeft > 0, cPages);
1388
1389 /* Special case: segment offset. */
1390 if (iter->iov_offset > 0) {
1391 if (iter->iov_offset < pCurIov->iov_len) {
1392 size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset;
1393 size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK;
1394 cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT;
1395 if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK)
1396 cPagesSpan = 0;
1397 }
1398 SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len));
1399 pCurIov++;
1400 cLeft--;
1401 }
1402
1403 /* Full segments. */
1404 while (cLeft-- > 0) {
1405 if (pCurIov->iov_len > 0) {
1406 size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK;
1407 if (offPage0 == 0) {
1408 if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) {
1409 cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT;
1410 } else {
1411 cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1412 if (cPagesSpan > cPages)
1413 cPages = cPagesSpan;
1414 cPagesSpan = 0;
1415 }
1416 } else {
1417 if (cPagesSpan > cPages)
1418 cPages = cPagesSpan;
1419 if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) {
1420 cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT;
1421 } else {
1422 cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1423 if (cPagesSpan > cPages)
1424 cPages = cPagesSpan;
1425 cPagesSpan = 0;
1426 }
1427 }
1428 }
1429 SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len));
1430 pCurIov++;
1431 }
1432 if (cPagesSpan > cPages)
1433 cPages = cPagesSpan;
1434 } else {
1435 /* Won't bother with accurate counts for the next two types, just make
1436 some rough estimates (does pipes have segments?): */
1437 size_t cSegs = iter->type & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1;
1438 cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT;
1439 }
1440 SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages));
1441 return cPages;
1442}
1443
1444
1445/**
1446 * Worker for vbsf_reg_read_iter() that deals with larger reads using page
1447 * locking.
1448 */
1449static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead,
1450 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
1451{
1452 /*
1453 * Estimate how many pages we may possible submit in a single request so
1454 * that we can allocate matching request buffer and page array.
1455 */
1456 struct page *apPagesStack[16];
1457 struct page **papPages = &apPagesStack[0];
1458 struct page **papPagesFree = NULL;
1459 VBOXSFREADPGLSTREQ *pReq;
1460 ssize_t cbRet = 0;
1461 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1462 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1463
1464 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1465 while (!pReq && cMaxPages > 4) {
1466 cMaxPages /= 2;
1467 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1468 }
1469 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1470 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1471 if (pReq && papPages) {
1472
1473 /*
1474 * The read loop.
1475 */
1476 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1477 do {
1478 /*
1479 * Grab as many pages as we can. This means that if adjacent
1480 * segments both starts and ends at a page boundrary, we can
1481 * do them both in the same transfer from the host.
1482 */
1483 size_t cPages = 0;
1484 size_t cbChunk = 0;
1485 size_t offPage0 = 0;
1486 int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1487 if (rc == 0) {
1488 size_t iPage = cPages;
1489 while (iPage-- > 0)
1490 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1491 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1492 AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead);
1493 } else {
1494 cbRet = rc;
1495 break;
1496 }
1497
1498 /*
1499 * Issue the request and unlock the pages.
1500 */
1501 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages);
1502 SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1503 rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0));
1504
1505 vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/);
1506
1507 if (RT_SUCCESS(rc)) {
1508 /*
1509 * Success, advance position and buffer.
1510 */
1511 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
1512 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1513 cbRet += cbActual;
1514 kio->ki_pos += cbActual;
1515 cbToRead -= cbActual;
1516
1517 /*
1518 * Are we done already?
1519 */
1520 if (!cbToRead)
1521 break;
1522 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1523 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1524 iov_iter_truncate(iter, 0);
1525 break;
1526 }
1527 } else {
1528 /*
1529 * Try rewind the iter structure.
1530 */
1531 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1532 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1533 /*
1534 * The host probably doesn't have enough heap to handle the
1535 * request, reduce the page count and retry.
1536 */
1537 cMaxPages /= 4;
1538 Assert(cMaxPages > 0);
1539 } else {
1540 /*
1541 * If we've successfully read stuff, return it rather than
1542 * the error. (Not sure if this is such a great idea...)
1543 */
1544 if (cbRet <= 0)
1545 cbRet = -EPROTO;
1546 break;
1547 }
1548 }
1549 } while (cbToRead > 0);
1550
1551 vbsf_iter_cleanup_stash(iter, &Stash);
1552 }
1553 else
1554 cbRet = -ENOMEM;
1555 if (papPagesFree)
1556 kfree(papPages);
1557 if (pReq)
1558 VbglR0PhysHeapFree(pReq);
1559 SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1560 return cbRet;
1561}
1562
1563
1564/**
1565 * Read into I/O vector iterator.
1566 *
1567 * @returns Number of bytes read on success, negative errno on error.
1568 * @param kio The kernel I/O control block (or something like that).
1569 * @param iter The I/O vector iterator describing the buffer.
1570 */
1571static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter)
1572{
1573 size_t cbToRead = iov_iter_count(iter);
1574 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1575 struct address_space *mapping = inode->i_mapping;
1576
1577 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1578 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1579
1580 SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1581 inode, kio->ki_filp, cbToRead, kio->ki_pos, iter->type));
1582 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1583
1584 /*
1585 * Do we have anything at all to do here?
1586 */
1587 if (!cbToRead)
1588 return 0;
1589
1590 /*
1591 * If there is a mapping and O_DIRECT isn't in effect, we must at a
1592 * heed dirty pages in the mapping and read from them. For simplicity
1593 * though, we just do page cache reading when there are writable
1594 * mappings around with any kind of pages loaded.
1595 */
1596 if (vbsf_should_use_cached_read(kio->ki_filp, mapping, sf_g))
1597 return generic_file_read_iter(kio, iter);
1598
1599 /*
1600 * Now now we reject async I/O requests.
1601 */
1602 if (!is_sync_kiocb(kio)) {
1603 SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1604 return -EOPNOTSUPP;
1605 }
1606
1607 /*
1608 * For small requests, try use an embedded buffer provided we get a heap block
1609 * that does not cross page boundraries (see host code).
1610 */
1611 if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
1612 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead;
1613 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1614 if (pReq) {
1615 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1616 ssize_t cbRet;
1617 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, (uint32_t)cbToRead);
1618 if (RT_SUCCESS(vrc)) {
1619 cbRet = pReq->Parms.cb32Read.u.value32;
1620 AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead);
1621 if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) {
1622 kio->ki_pos += cbRet;
1623 if (cbRet < cbToRead)
1624 iov_iter_truncate(iter, 0);
1625 } else
1626 cbRet = -EFAULT;
1627 } else
1628 cbRet = -EPROTO;
1629 VbglR0PhysHeapFree(pReq);
1630 SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1631 return cbRet;
1632 }
1633 VbglR0PhysHeapFree(pReq);
1634 }
1635 }
1636
1637 /*
1638 * Otherwise do the page locking thing.
1639 */
1640 return vbsf_reg_read_iter_locking(kio, iter, cbToRead, sf_g, sf_r);
1641}
1642
1643
1644/**
1645 * Worker for vbsf_reg_write_iter() that deals with larger writes using page
1646 * locking.
1647 */
1648static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile,
1649 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
1650 struct inode *inode, struct vbsf_inode_info *sf_i, struct address_space *mapping)
1651{
1652 /*
1653 * Estimate how many pages we may possible submit in a single request so
1654 * that we can allocate matching request buffer and page array.
1655 */
1656 struct page *apPagesStack[16];
1657 struct page **papPages = &apPagesStack[0];
1658 struct page **papPagesFree = NULL;
1659 VBOXSFWRITEPGLSTREQ *pReq;
1660 ssize_t cbRet = 0;
1661 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1662 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1663
1664 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1665 while (!pReq && cMaxPages > 4) {
1666 cMaxPages /= 2;
1667 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1668 }
1669 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1670 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1671 if (pReq && papPages) {
1672
1673 /*
1674 * The write loop.
1675 */
1676 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1677 do {
1678 /*
1679 * Grab as many pages as we can. This means that if adjacent
1680 * segments both starts and ends at a page boundrary, we can
1681 * do them both in the same transfer from the host.
1682 */
1683 size_t cPages = 0;
1684 size_t cbChunk = 0;
1685 size_t offPage0 = 0;
1686 int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1687 if (rc == 0) {
1688 size_t iPage = cPages;
1689 while (iPage-- > 0)
1690 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1691 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1692 AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite);
1693 } else {
1694 cbRet = rc;
1695 break;
1696 }
1697
1698 /*
1699 * Issue the request and unlock the pages.
1700 */
1701 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
1702 SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1703 rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0));
1704
1705 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1706
1707 if (RT_SUCCESS(rc)) {
1708 /*
1709 * Success, advance position and buffer.
1710 */
1711 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
1712 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1713 cbRet += cbActual;
1714 offFile += cbActual;
1715 kio->ki_pos = offFile;
1716 cbToWrite -= cbActual;
1717 if (offFile > i_size_read(inode))
1718 i_size_write(inode, offFile);
1719 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbActual, offFile);
1720 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1721
1722 /*
1723 * Are we done already?
1724 */
1725 if (!cbToWrite)
1726 break;
1727 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1728 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1729 iov_iter_truncate(iter, 0);
1730 break;
1731 }
1732 } else {
1733 /*
1734 * Try rewind the iter structure.
1735 */
1736 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1737 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1738 /*
1739 * The host probably doesn't have enough heap to handle the
1740 * request, reduce the page count and retry.
1741 */
1742 cMaxPages /= 4;
1743 Assert(cMaxPages > 0);
1744 } else {
1745 /*
1746 * If we've successfully written stuff, return it rather than
1747 * the error. (Not sure if this is such a great idea...)
1748 */
1749 if (cbRet <= 0)
1750 cbRet = -EPROTO;
1751 break;
1752 }
1753 }
1754 } while (cbToWrite > 0);
1755
1756 vbsf_iter_cleanup_stash(iter, &Stash);
1757 }
1758 else
1759 cbRet = -ENOMEM;
1760 if (papPagesFree)
1761 kfree(papPages);
1762 if (pReq)
1763 VbglR0PhysHeapFree(pReq);
1764 SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1765 return cbRet;
1766}
1767
1768
1769
1770/**
1771 * Write from I/O vector iterator.
1772 *
1773 * @returns Number of bytes written on success, negative errno on error.
1774 * @param kio The kernel I/O control block (or something like that).
1775 * @param iter The I/O vector iterator describing the buffer.
1776 */
1777static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter)
1778{
1779 size_t cbToWrite = iov_iter_count(iter);
1780 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1781 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1782 struct address_space *mapping = inode->i_mapping;
1783
1784 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1785 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1786 loff_t offFile = kio->ki_pos;
1787
1788 SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1789 inode, kio->ki_filp, cbToWrite, offFile, iter->type));
1790 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1791
1792 /*
1793 * Enforce APPEND flag.
1794 */
1795 /** @todo This should be handled by the host, it returning the new file
1796 * offset when appending. We may have an outdated i_size value here! */
1797 if (kio->ki_flags & IOCB_APPEND)
1798 kio->ki_pos = offFile = i_size_read(inode);
1799
1800 /*
1801 * Do we have anything at all to do here?
1802 */
1803 if (!cbToWrite)
1804 return 0;
1805
1806 /*
1807 * Now now we reject async I/O requests.
1808 */
1809 if (!is_sync_kiocb(kio)) {
1810 SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1811 return -EOPNOTSUPP;
1812 }
1813
1814 /*
1815 * If there are active writable mappings, coordinate with any
1816 * pending writes via those.
1817 */
1818 if ( mapping
1819 && mapping->nrpages > 0
1820 && mapping_writably_mapped(mapping)) {
1821#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1822 int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1);
1823 if (err)
1824 return err;
1825#else
1826 /** @todo ... */
1827#endif
1828 }
1829
1830 /*
1831 * For small requests, try use an embedded buffer provided we get a heap block
1832 * that does not cross page boundraries (see host code).
1833 */
1834 if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1835 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite;
1836 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1837 if (pReq) {
1838 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1839 ssize_t cbRet;
1840 if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) {
1841 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1842 offFile, (uint32_t)cbToWrite);
1843 if (RT_SUCCESS(vrc)) {
1844 cbRet = pReq->Parms.cb32Write.u.value32;
1845 AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite);
1846 kio->ki_pos = offFile += cbRet;
1847 if (offFile > i_size_read(inode))
1848 i_size_write(inode, offFile);
1849 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbRet, offFile);
1850# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1851 if ((size_t)cbRet < cbToWrite)
1852 iov_iter_revert(iter, cbToWrite - cbRet);
1853# endif
1854 } else
1855 cbRet = -EPROTO;
1856 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1857 } else
1858 cbRet = -EFAULT;
1859 VbglR0PhysHeapFree(pReq);
1860 SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1861 return cbRet;
1862 }
1863 VbglR0PhysHeapFree(pReq);
1864 }
1865 }
1866
1867 /*
1868 * Otherwise do the page locking thing.
1869 */
1870 return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, sf_g, sf_r, inode, sf_i, mapping);
1871}
1872
1873#endif /* >= 3.16.0 */
1874
1875/**
1876 * Open a regular file.
1877 *
1878 * @param inode the inode
1879 * @param file the file
1880 * @returns 0 on success, Linux error code otherwise
1881 */
1882static int vbsf_reg_open(struct inode *inode, struct file *file)
1883{
1884 int rc, rc_linux = 0;
1885 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1886 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1887 struct vbsf_reg_info *sf_r;
1888 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
1889 VBOXSFCREATEREQ *pReq;
1890
1891 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
1892 BUG_ON(!sf_g);
1893 BUG_ON(!sf_i);
1894
1895 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
1896 if (!sf_r) {
1897 LogRelFunc(("could not allocate reg info\n"));
1898 return -ENOMEM;
1899 }
1900
1901 RTListInit(&sf_r->Handle.Entry);
1902 sf_r->Handle.cRefs = 1;
1903 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
1904 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
1905
1906 /* Already open? */
1907 if (sf_i->handle != SHFL_HANDLE_NIL) {
1908 /*
1909 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
1910 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
1911 * about the access flags (SHFL_CF_ACCESS_*).
1912 */
1913 sf_i->force_restat = 1;
1914 sf_r->Handle.hHost = sf_i->handle;
1915 sf_i->handle = SHFL_HANDLE_NIL;
1916 file->private_data = sf_r;
1917
1918 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
1919 vbsf_handle_append(sf_i, &sf_r->Handle);
1920 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1921 return 0;
1922 }
1923
1924 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
1925 if (!pReq) {
1926 kfree(sf_r);
1927 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
1928 return -ENOMEM;
1929 }
1930 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
1931 RT_ZERO(pReq->CreateParms);
1932 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
1933
1934 /* We check the value of pReq->CreateParms.Handle afterwards to
1935 * find out if the call succeeded or failed, as the API does not seem
1936 * to cleanly distinguish error and informational messages.
1937 *
1938 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
1939 * to make the shared folders host service use our fMode parameter */
1940
1941 if (file->f_flags & O_CREAT) {
1942 LogFunc(("O_CREAT set\n"));
1943 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1944 /* We ignore O_EXCL, as the Linux kernel seems to call create
1945 beforehand itself, so O_EXCL should always fail. */
1946 if (file->f_flags & O_TRUNC) {
1947 LogFunc(("O_TRUNC set\n"));
1948 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1949 } else
1950 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1951 } else {
1952 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1953 if (file->f_flags & O_TRUNC) {
1954 LogFunc(("O_TRUNC set\n"));
1955 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1956 }
1957 }
1958
1959 switch (file->f_flags & O_ACCMODE) {
1960 case O_RDONLY:
1961 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READ;
1962 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ;
1963 break;
1964
1965 case O_WRONLY:
1966 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_WRITE;
1967 sf_r->Handle.fFlags |= VBSF_HANDLE_F_WRITE;
1968 break;
1969
1970 case O_RDWR:
1971 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READWRITE;
1972 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
1973 break;
1974
1975 default:
1976 BUG();
1977 }
1978
1979 if (file->f_flags & O_APPEND) {
1980 LogFunc(("O_APPEND set\n"));
1981 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_APPEND;
1982 sf_r->Handle.fFlags |= VBSF_HANDLE_F_APPEND;
1983 }
1984
1985 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
1986 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
1987 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
1988 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1989 if (RT_FAILURE(rc)) {
1990 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
1991 kfree(sf_r);
1992 VbglR0PhysHeapFree(pReq);
1993 return -RTErrConvertToErrno(rc);
1994 }
1995
1996 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
1997 vbsf_dentry_chain_increase_ttl(dentry);
1998 rc_linux = 0;
1999 } else {
2000 switch (pReq->CreateParms.Result) {
2001 case SHFL_PATH_NOT_FOUND:
2002 rc_linux = -ENOENT;
2003 break;
2004 case SHFL_FILE_NOT_FOUND:
2005 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
2006 rc_linux = -ENOENT;
2007 break;
2008 case SHFL_FILE_EXISTS:
2009 vbsf_dentry_chain_increase_ttl(dentry);
2010 rc_linux = -EEXIST;
2011 break;
2012 default:
2013 vbsf_dentry_chain_increase_parent_ttl(dentry);
2014 rc_linux = 0;
2015 break;
2016 }
2017 }
2018
2019 sf_i->force_restat = 1; /** @todo Why?!? */
2020 sf_r->Handle.hHost = pReq->CreateParms.Handle;
2021 file->private_data = sf_r;
2022 vbsf_handle_append(sf_i, &sf_r->Handle);
2023 VbglR0PhysHeapFree(pReq);
2024 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2025 return rc_linux;
2026}
2027
2028
2029/**
2030 * Close a regular file.
2031 *
2032 * @param inode the inode
2033 * @param file the file
2034 * @returns 0 on success, Linux error code otherwise
2035 */
2036static int vbsf_reg_release(struct inode *inode, struct file *file)
2037{
2038 struct vbsf_reg_info *sf_r;
2039 struct vbsf_super_info *sf_g;
2040 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2041
2042 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
2043 sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2044 sf_r = file->private_data;
2045
2046 BUG_ON(!sf_g);
2047 BUG_ON(!sf_r);
2048
2049#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
2050 /* See the smbfs source (file.c). mmap in particular can cause data to be
2051 * written to the file after it is closed, which we can't cope with. We
2052 * copy and paste the body of filemap_write_and_wait() here as it was not
2053 * defined before 2.6.6 and not exported until quite a bit later. */
2054 /* filemap_write_and_wait(inode->i_mapping); */
2055 if (inode->i_mapping->nrpages
2056 && filemap_fdatawrite(inode->i_mapping) != -EIO)
2057 filemap_fdatawait(inode->i_mapping);
2058#endif
2059
2060 /* Release sf_r, closing the handle if we're the last user. */
2061 file->private_data = NULL;
2062 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
2063
2064 sf_i->handle = SHFL_HANDLE_NIL;
2065 return 0;
2066}
2067
2068/**
2069 * Wrapper around generic/default seek function that ensures that we've got
2070 * the up-to-date file size when doing anything relative to EOF.
2071 *
2072 * The issue is that the host may extend the file while we weren't looking and
2073 * if the caller wishes to append data, it may end up overwriting existing data
2074 * if we operate with a stale size. So, we always retrieve the file size on EOF
2075 * relative seeks.
2076 */
2077static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
2078{
2079 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
2080
2081 switch (whence) {
2082#ifdef SEEK_HOLE
2083 case SEEK_HOLE:
2084 case SEEK_DATA:
2085#endif
2086 case SEEK_END: {
2087 struct vbsf_reg_info *sf_r = file->private_data;
2088 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost,
2089 true /*fForce*/, false /*fInodeLocked*/);
2090 if (rc == 0)
2091 break;
2092 return rc;
2093 }
2094 }
2095
2096#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
2097 return generic_file_llseek(file, off, whence);
2098#else
2099 return default_llseek(file, off, whence);
2100#endif
2101}
2102
2103/**
2104 * Flush region of file - chiefly mmap/msync.
2105 *
2106 * We cannot use the noop_fsync / simple_sync_file here as that means
2107 * msync(,,MS_SYNC) will return before the data hits the host, thereby
2108 * causing coherency issues with O_DIRECT access to the same file as
2109 * well as any host interaction with the file.
2110 */
2111#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
2112static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2113{
2114# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2115 return __generic_file_fsync(file, start, end, datasync);
2116# else
2117 return generic_file_fsync(file, start, end, datasync);
2118# endif
2119}
2120#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
2121static int vbsf_reg_fsync(struct file *file, int datasync)
2122{
2123 return generic_file_fsync(file, datasync);
2124}
2125#else /* < 2.6.35 */
2126static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
2127{
2128# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
2129 return simple_fsync(file, dentry, datasync);
2130# else
2131 int rc;
2132 struct inode *inode = dentry->d_inode;
2133 AssertReturn(inode, -EINVAL);
2134
2135 /** @todo What about file_fsync()? (<= 2.5.11) */
2136
2137# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2138 rc = sync_mapping_buffers(inode->i_mapping);
2139 if ( rc == 0
2140 && (inode->i_state & I_DIRTY)
2141 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
2142 ) {
2143 struct writeback_control wbc = {
2144 .sync_mode = WB_SYNC_ALL,
2145 .nr_to_write = 0
2146 };
2147 rc = sync_inode(inode, &wbc);
2148 }
2149# else /* < 2.5.12 */
2150 rc = fsync_inode_buffers(inode);
2151# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2152 rc |= fsync_inode_data_buffers(inode);
2153# endif
2154 /** @todo probably need to do more here... */
2155# endif /* < 2.5.12 */
2156 return rc;
2157# endif
2158}
2159#endif /* < 2.6.35 */
2160
2161
2162/**
2163 * File operations for regular files.
2164 */
2165struct file_operations vbsf_reg_fops = {
2166 .open = vbsf_reg_open,
2167 .read = vbsf_reg_read,
2168 .write = vbsf_reg_write,
2169#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2170 .read_iter = vbsf_reg_read_iter,
2171 .write_iter = vbsf_reg_write_iter,
2172#endif
2173 .release = vbsf_reg_release,
2174 .mmap = generic_file_mmap,
2175#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2176# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
2177/** @todo This code is known to cause caching of data which should not be
2178 * cached. Investigate. */
2179# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2180 .splice_read = vbsf_splice_read,
2181# else
2182 .sendfile = generic_file_sendfile,
2183# endif
2184 .aio_read = generic_file_aio_read,
2185 .aio_write = generic_file_aio_write,
2186# endif
2187#endif
2188 .llseek = vbsf_reg_llseek,
2189 .fsync = vbsf_reg_fsync,
2190};
2191
2192struct inode_operations vbsf_reg_iops = {
2193#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 18)
2194 .getattr = vbsf_inode_getattr,
2195#else
2196 .revalidate = vbsf_inode_revalidate,
2197#endif
2198 .setattr = vbsf_inode_setattr,
2199};
2200
2201
2202#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2203
2204/**
2205 * Used to read the content of a page into the page cache.
2206 *
2207 * Needed for mmap and reads+writes when the file is mmapped in a
2208 * shared+writeable fashion.
2209 */
2210static int vbsf_readpage(struct file *file, struct page *page)
2211{
2212 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
2213 int err;
2214
2215 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
2216 Assert(PageLocked(page));
2217
2218 if (PageUptodate(page)) {
2219 unlock_page(page);
2220 return 0;
2221 }
2222
2223 if (!is_bad_inode(inode)) {
2224 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2225 if (pReq) {
2226 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2227 struct vbsf_reg_info *sf_r = file->private_data;
2228 uint32_t cbRead;
2229 int vrc;
2230
2231 pReq->PgLst.offFirstPage = 0;
2232 pReq->PgLst.aPages[0] = page_to_phys(page);
2233 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
2234 pReq,
2235 sf_r->Handle.hHost,
2236 (uint64_t)page->index << PAGE_SHIFT,
2237 PAGE_SIZE,
2238 1 /*cPages*/);
2239
2240 cbRead = pReq->Parms.cb32Read.u.value32;
2241 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
2242 VbglR0PhysHeapFree(pReq);
2243
2244 if (RT_SUCCESS(vrc)) {
2245 if (cbRead == PAGE_SIZE) {
2246 /* likely */
2247 } else {
2248 uint8_t *pbMapped = (uint8_t *)kmap(page);
2249 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
2250 kunmap(page);
2251 /** @todo truncate the inode file size? */
2252 }
2253
2254 flush_dcache_page(page);
2255 SetPageUptodate(page);
2256 unlock_page(page);
2257 return 0;
2258 }
2259 err = -RTErrConvertToErrno(vrc);
2260 } else
2261 err = -ENOMEM;
2262 } else
2263 err = -EIO;
2264 SetPageError(page);
2265 unlock_page(page);
2266 return err;
2267}
2268
2269
2270/**
2271 * Used to write out the content of a dirty page cache page to the host file.
2272 *
2273 * Needed for mmap and writes when the file is mmapped in a shared+writeable
2274 * fashion.
2275 */
2276static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
2277{
2278 struct address_space *mapping = page->mapping;
2279 struct inode *inode = mapping->host;
2280 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2281 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
2282 int err;
2283
2284 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
2285 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
2286
2287 if (pHandle) {
2288 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2289 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2290 if (pReq) {
2291 uint64_t const cbFile = i_size_read(inode);
2292 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
2293 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
2294 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
2295 int vrc;
2296
2297 pReq->PgLst.offFirstPage = 0;
2298 pReq->PgLst.aPages[0] = page_to_phys(page);
2299 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
2300 pReq,
2301 pHandle->hHost,
2302 offInFile,
2303 cbToWrite,
2304 1 /*cPages*/);
2305 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
2306 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
2307 vrc = VERR_WRITE_ERROR);
2308 VbglR0PhysHeapFree(pReq);
2309
2310 if (RT_SUCCESS(vrc)) {
2311 /* Update the inode if we've extended the file. */
2312 /** @todo is this necessary given the cbToWrite calc above? */
2313 uint64_t const offEndOfWrite = offInFile + cbToWrite;
2314 if ( offEndOfWrite > cbFile
2315 && offEndOfWrite > i_size_read(inode))
2316 i_size_write(inode, offEndOfWrite);
2317
2318 if (PageError(page))
2319 ClearPageError(page);
2320
2321 err = 0;
2322 } else {
2323 ClearPageUptodate(page);
2324 err = -EPROTO;
2325 }
2326 } else
2327 err = -ENOMEM;
2328 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
2329 } else {
2330 static uint64_t volatile s_cCalls = 0;
2331 if (s_cCalls++ < 16)
2332 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
2333 err = -EPROTO;
2334 }
2335 unlock_page(page);
2336 return err;
2337}
2338
2339# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2340/**
2341 * Called when writing thru the page cache (which we shouldn't be doing).
2342 */
2343int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
2344 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
2345{
2346 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
2347 * the page cache for any writes AFAIK. We could just as well use
2348 * simple_write_begin & simple_write_end here if we think we really
2349 * need to have non-NULL function pointers in the table... */
2350 static uint64_t volatile s_cCalls = 0;
2351 if (s_cCalls++ < 16) {
2352 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2353 (unsigned long long)pos, len, flags);
2354 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2355 (unsigned long long)pos, len, flags);
2356# ifdef WARN_ON
2357 WARN_ON(1);
2358# endif
2359 }
2360 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
2361}
2362# endif /* KERNEL_VERSION >= 2.6.24 */
2363
2364# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2365/**
2366 * This is needed to make open accept O_DIRECT as well as dealing with direct
2367 * I/O requests if we don't intercept them earlier.
2368 */
2369# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
2370static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2371# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
2372static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2373# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2374static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2375# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
2376static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2377# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
2378static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2379# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
2380static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2381# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
2382static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2383# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
2384static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
2385# else
2386static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
2387# endif
2388{
2389 TRACE();
2390 return -EINVAL;
2391}
2392# endif
2393
2394/**
2395 * Address space (for the page cache) operations for regular files.
2396 */
2397struct address_space_operations vbsf_reg_aops = {
2398 .readpage = vbsf_readpage,
2399 .writepage = vbsf_writepage,
2400 /** @todo Need .writepages if we want msync performance... */
2401# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2402 .set_page_dirty = __set_page_dirty_buffers,
2403# endif
2404# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2405 .write_begin = vbsf_write_begin,
2406 .write_end = simple_write_end,
2407# else
2408 .prepare_write = simple_prepare_write,
2409 .commit_write = simple_commit_write,
2410# endif
2411# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2412 .direct_IO = vbsf_direct_IO,
2413# endif
2414};
2415
2416#endif /* LINUX_VERSION_CODE >= 2.6.0 */
2417
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette