upload.service.ts 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. import { Injectable, Logger, BadRequestException } from '@nestjs/common';
  2. import { KnowledgeBaseService } from '../knowledge-base/knowledge-base.service';
  3. import { KnowledgeGroupService } from '../knowledge-group/knowledge-group.service';
  4. import * as fs from 'fs';
  5. import * as path from 'path';
  6. @Injectable()
  7. export class UploadService {
  8. private readonly logger = new Logger(UploadService.name);
  9. constructor(
  10. private kbService: KnowledgeBaseService,
  11. private groupService: KnowledgeGroupService,
  12. ) {}
  13. async processUploadedFile(file: Express.Multer.File) {
  14. // Add more business logic here. Example:
  15. // - Save file info to database
  16. // - Call other services to process file (Tika text extraction, ES indexing etc.)
  17. // - Validate file format or analyze content
  18. // Currently only return basic file info
  19. return {
  20. filename: file.filename,
  21. originalname: file.originalname,
  22. size: file.size,
  23. mimetype: file.mimetype,
  24. path: file.path, // After Multer saves file, full path is in file.path
  25. };
  26. }
  27. async importLocalFolder(
  28. sourcePath: string,
  29. userId: string,
  30. tenantId: string,
  31. config: any,
  32. ) {
  33. if (!fs.existsSync(sourcePath)) {
  34. throw new BadRequestException(`Directory not found: ${sourcePath}`);
  35. }
  36. const stat = fs.statSync(sourcePath);
  37. if (!stat.isDirectory()) {
  38. throw new BadRequestException(`Path is not a directory: ${sourcePath}`);
  39. }
  40. // Determine root group for hierarchy or single group
  41. let rootGroupId: string | null = null;
  42. if (config.groupIds && config.groupIds.length > 0) {
  43. rootGroupId = config.groupIds[0];
  44. }
  45. this.logger.log(
  46. `Starting local folder import: ${sourcePath} for user ${userId}, tenant ${tenantId}`,
  47. );
  48. // Trigger scanning and processing asynchronously to not block the request
  49. this.executeLocalImport(
  50. sourcePath,
  51. userId,
  52. tenantId,
  53. config,
  54. rootGroupId,
  55. ).catch((err) => {
  56. this.logger.error(`Local folder import failed for ${sourcePath}`, err);
  57. });
  58. return {
  59. sourcePath,
  60. status: 'PROCESSING',
  61. };
  62. }
  63. private async executeLocalImport(
  64. sourcePath: string,
  65. userId: string,
  66. tenantId: string,
  67. config: any,
  68. rootGroupId: string | null,
  69. ) {
  70. const files = this.scanDir(sourcePath);
  71. this.logger.log(`Found ${files.length} files in ${sourcePath}`);
  72. const dirToGroupId = new Map<string, string>();
  73. if (rootGroupId) {
  74. dirToGroupId.set('.', rootGroupId);
  75. } else {
  76. // Create a root group based on folder name if none provided
  77. const rootName = path.basename(sourcePath);
  78. const rootGroup = await this.groupService.create(userId, tenantId, {
  79. name: rootName,
  80. description: `Imported from local path: ${sourcePath}`,
  81. });
  82. rootGroupId = rootGroup.id;
  83. dirToGroupId.set('.', rootGroupId);
  84. }
  85. const uploadBaseDir = process.env.UPLOAD_FILE_PATH || './uploads';
  86. for (const filePath of files) {
  87. try {
  88. const relativeDir = path.relative(sourcePath, path.dirname(filePath));
  89. const normalizedDir = relativeDir || '.';
  90. let targetGroupId = rootGroupId;
  91. if (config.useHierarchy) {
  92. targetGroupId = await this.ensureHierarchy(
  93. userId,
  94. tenantId,
  95. normalizedDir,
  96. dirToGroupId,
  97. rootGroupId,
  98. );
  99. }
  100. const filename = path.basename(filePath);
  101. const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1e9);
  102. const storedFilename = `local-${uniqueSuffix}-${filename}`;
  103. // Ensure tenant directory exists
  104. const tenantDir = path.join(uploadBaseDir, tenantId);
  105. if (!fs.existsSync(tenantDir)) {
  106. fs.mkdirSync(tenantDir, { recursive: true });
  107. }
  108. const targetPath = path.join(tenantDir, storedFilename);
  109. fs.copyFileSync(filePath, targetPath);
  110. const stats = fs.statSync(targetPath);
  111. const fileInfo = {
  112. filename: storedFilename,
  113. originalname: filename,
  114. path: targetPath,
  115. size: stats.size,
  116. mimetype: this.getMimeType(filename),
  117. };
  118. await this.kbService.createAndIndex(fileInfo, userId, tenantId, {
  119. ...config,
  120. groupIds: [targetGroupId],
  121. });
  122. } catch (err) {
  123. this.logger.error(`Failed to process local file: ${filePath}`, err);
  124. }
  125. }
  126. this.logger.log(`Local folder import completed: ${sourcePath}`);
  127. }
  128. private async ensureHierarchy(
  129. userId: string,
  130. tenantId: string,
  131. relativeDir: string,
  132. dirToGroupId: Map<string, string>,
  133. rootGroupId: string,
  134. ): Promise<string> {
  135. if (dirToGroupId.has(relativeDir)) {
  136. return dirToGroupId.get(relativeDir)!;
  137. }
  138. const segments = relativeDir.split(path.sep);
  139. let currentPath = '';
  140. let parentId = rootGroupId;
  141. for (const segment of segments) {
  142. if (!segment || segment === '.') continue;
  143. currentPath = currentPath ? path.join(currentPath, segment) : segment;
  144. if (dirToGroupId.has(currentPath)) {
  145. parentId = dirToGroupId.get(currentPath)!;
  146. continue;
  147. }
  148. const group = await this.groupService.findOrCreate(
  149. userId,
  150. tenantId,
  151. segment,
  152. parentId,
  153. `Sub-folder from local import: ${currentPath}`,
  154. );
  155. dirToGroupId.set(currentPath, group.id);
  156. parentId = group.id;
  157. }
  158. return parentId;
  159. }
  160. private scanDir(directory: string): string[] {
  161. let results: string[] = [];
  162. if (!fs.existsSync(directory)) return results;
  163. const items = fs.readdirSync(directory);
  164. for (const item of items) {
  165. const fullPath = path.join(directory, item);
  166. const stat = fs.statSync(fullPath);
  167. if (stat.isDirectory()) {
  168. results = results.concat(this.scanDir(fullPath));
  169. } else {
  170. // Only include supported document and code extensions
  171. const ext = path.extname(item).toLowerCase().slice(1);
  172. if (
  173. [
  174. 'pdf',
  175. 'doc',
  176. 'docx',
  177. 'xls',
  178. 'xlsx',
  179. 'ppt',
  180. 'pptx',
  181. 'rtf',
  182. 'csv',
  183. 'txt',
  184. 'md',
  185. 'html',
  186. 'json',
  187. 'xml',
  188. 'js',
  189. 'ts',
  190. 'py',
  191. 'java',
  192. 'sql',
  193. ].includes(ext)
  194. ) {
  195. results.push(fullPath);
  196. }
  197. }
  198. }
  199. return results;
  200. }
  201. private getMimeType(filename: string): string {
  202. const ext = path.extname(filename).toLowerCase();
  203. const mimeMap: Record<string, string> = {
  204. '.pdf': 'application/pdf',
  205. '.doc': 'application/msword',
  206. '.docx':
  207. 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
  208. '.md': 'text/markdown',
  209. '.txt': 'text/plain',
  210. '.json': 'application/json',
  211. '.html': 'text/html',
  212. '.csv': 'text/csv',
  213. };
  214. return mimeMap[ext] || 'application/octet-stream';
  215. }
  216. }