Skip to content

Commit 520854f

Browse files
committed
Merge pull request #28 from ContentMine/dev
Update to 0.2.26
2 parents 01957ab + e475526 commit 520854f

447 files changed

Lines changed: 1232767 additions & 683 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
target/
99
docs/
1010
src/test/resources/org/xmlcml/norma/pubstyle/getpapers/anopheles/
11+
/target/

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
language: java

JATS-archivearticle1.dtd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!-- dummy -->

docs/TUTORIAL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ This means
8282

8383
This should create the 11 XML files in `target/plos10/`.
8484

85-
Then we convert them. Now the topr directory is `target/plos10/`. It's not a CM directory but it has many child CM directories and converts each. We use the same command as before:
85+
Then we convert them. Now the top directory is `target/plos10/`. It's not a CM directory but it has many child CM directories and converts each. We use the same command as before:
8686
```
8787
norma -q target/plos10/ -i fulltext.xml -o scholarly.html --xsl nlm2html
8888
```

loose.dtd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!-- dummy DTD file -->

pom.xml

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
<name>norma</name>
1919
<description>A Java library for processing multiple legacy formats into normalized HTML5</description>
2020

21+
<properties>
22+
<opennlp.version>1.6.0</opennlp.version>
23+
</properties>
2124
<licenses>
2225
<license>
2326
<name>Apache License, Version 2.0</name>
@@ -53,6 +56,7 @@
5356
<plugin>
5457
<groupId>org.codehaus.mojo</groupId>
5558
<artifactId>cobertura-maven-plugin</artifactId>
59+
<version>2.7</version>
5660
<configuration>
5761
<check>
5862
<haltOnFailure>false</haltOnFailure>
@@ -92,6 +96,7 @@
9296
<plugin>
9397
<groupId>com.mycila.maven-license-plugin</groupId>
9498
<artifactId>maven-license-plugin</artifactId>
99+
<version>1.10.b1</version>
95100
<configuration>
96101
<header>src/main/resources/header.txt</header>
97102
</configuration>
@@ -185,7 +190,25 @@
185190
</dataSet>
186191
</configuration>
187192
</plugin>
188-
193+
<plugin>
194+
<groupId>org.apache.maven.plugins</groupId>
195+
<artifactId>maven-compiler-plugin</artifactId>
196+
<version>3.5</version>
197+
<configuration>
198+
<source>1.5</source>
199+
<target>1.5</target>
200+
</configuration>
201+
</plugin>
202+
<plugin>
203+
<groupId>org.apache.maven.plugins</groupId>
204+
<artifactId>maven-surefire-plugin</artifactId>
205+
<version>2.12.4</version>
206+
<configuration>
207+
<forkCount>3</forkCount>
208+
<reuseForks>true</reuseForks>
209+
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
210+
</configuration>
211+
</plugin>
189212
</plugins>
190213
</build>
191214

@@ -214,6 +237,12 @@
214237
<artifactId>svg2xml</artifactId>
215238
<version>0.1-SNAPSHOT</version>
216239
</dependency>
240+
<dependency>
241+
<groupId>org.apache.opennlp</groupId>
242+
<artifactId>opennlp-tools</artifactId>
243+
<version>${opennlp.version}</version>
244+
</dependency>
245+
217246
<!-- https://github.com/jayway/JsonPath/blob/master/README.md -->
218247
<dependency>
219248
<groupId>com.jayway.jsonpath</groupId>

src/main/java/org/xmlcml/norma/NormaArgProcessor.java

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import org.xmlcml.cmine.args.StringPair;
2020
import org.xmlcml.cmine.args.ValueElement;
2121
import org.xmlcml.cmine.args.VersionManager;
22-
import org.xmlcml.cmine.files.CMDir;
22+
import org.xmlcml.cmine.files.CTree;
2323
import org.xmlcml.html.HtmlElement;
2424
import org.xmlcml.norma.image.ocr.NamedImage;
2525
import org.xmlcml.norma.input.html.HtmlCleaner;
@@ -46,7 +46,7 @@ public class NormaArgProcessor extends DefaultArgProcessor {
4646

4747
public final static String HELP_NORMA = "Norma help";
4848

49-
private static String RESOURCE_NAME_TOP = "/org/xmlcml/norma";
49+
public static String RESOURCE_NAME_TOP = "/org/xmlcml/norma";
5050
private static String ARGS_RESOURCE = RESOURCE_NAME_TOP+"/"+"args.xml";
5151
private static final VersionManager NORMA_VERSION_MANAGER = new VersionManager();
5252

@@ -193,12 +193,16 @@ public void transform(ArgumentOption option) {
193193
}
194194

195195
public void runTransform(ArgumentOption option) {
196+
boolean ok = false;
196197
if (currentCTree == null) {
197-
LOG.warn("No current CMDir");
198+
LOG.warn("No current CTree");
198199
} else {
199200
LOG.trace("***run transform "+currentCTree);
200201
getOrCreateNormaTransformer();
201-
normaTransformer.transform(option);
202+
ok = normaTransformer.transform(option);
203+
if (!ok) {
204+
currentCTree = null;
205+
}
202206
}
203207
}
204208

@@ -266,93 +270,96 @@ void writeImages() {
266270
// ==========================
267271

268272

269-
public File checkAndGetInputFile(CMDir cmDir) {
270-
if (cmDir == null) {
271-
throw new RuntimeException("null cmDir");
273+
public File checkAndGetInputFile(CTree cTree) {
274+
if (cTree == null) {
275+
throw new RuntimeException("null cTree");
272276
}
273277
String inputName = getString();
274278
if (inputName == null) {
275279
throw new RuntimeException("Must have single input option");
276280
}
277-
if (!CMDir.isReservedFilename(inputName) && !CMDir.hasReservedParentDirectory(inputName) ) {
281+
if (!CTree.isReservedFilename(inputName) && !CTree.hasReservedParentDirectory(inputName) ) {
278282
throw new RuntimeException("Input must be reserved file; found: "+inputName);
279283
}
280-
File inputFile = cmDir.getExistingReservedFile(inputName);
284+
File inputFile = cTree.getExistingReservedFile(inputName);
281285
if (inputFile == null) {
282-
inputFile = cmDir.getExistingFileWithReservedParentDirectory(inputName);
286+
inputFile = cTree.getExistingFileWithReservedParentDirectory(inputName);
283287
}
284288
if (inputFile == null) {
285-
throw new RuntimeException("Could not find input file "+inputName+" in directory "+cmDir.getDirectory());
289+
String msg = "Could not find input file "+inputName+" in directory "+cTree.getDirectory();
290+
TREE_LOG().error(msg);
291+
System.err.print("!");
292+
// throw new RuntimeException(msg);
286293
}
287294
return inputFile;
288295
}
289296

290-
private void createCMDirListFromInputList() {
297+
private void createCTreeListFromInputList() {
291298
// proceed unless there is a single reserved file for input
292-
if (CMDir.isNonEmptyNonReservedInputList(inputList)) {
293-
LOG.trace("CREATING CMDir FROM INPUT:"+inputList);
299+
if (CTree.isNonEmptyNonReservedInputList(inputList)) {
300+
LOG.trace("CREATING CTree FROM INPUT:"+inputList);
294301
// this actually creates directory
295302
getOrCreateOutputDirectory();
296303
ensureCTreeList();
297-
createNewCMDirsAndCopyOriginalFilesAndAddToList();
304+
createNewCTreesAndCopyOriginalFilesAndAddToList();
298305
}
299306
}
300307

301-
private void createNewCMDirsAndCopyOriginalFilesAndAddToList() {
308+
private void createNewCTreesAndCopyOriginalFilesAndAddToList() {
302309
ensureCTreeList();
303310
for (String filename : inputList) {
304311
try {
305-
CMDir cmDir = createCMDirAndCopyFileOrMakeSubDirectory(filename);
306-
if (cmDir != null) {
307-
cTreeList.add(cmDir);
312+
CTree cTree = createCTreeAndCopyFileOrMakeSubDirectory(filename);
313+
if (cTree != null) {
314+
cTreeList.add(cTree);
308315
}
309316
} catch (IOException e) {
310-
LOG.error("Failed to create CMDir: "+filename+"; "+e);
317+
LOG.error("Failed to create CTree: "+filename+"; "+e);
311318
}
312319
}
313320
}
314321

315-
private CMDir createCMDirAndCopyFileOrMakeSubDirectory(String filename) throws IOException {
316-
CMDir cmDir = null;
322+
private CTree createCTreeAndCopyFileOrMakeSubDirectory(String filename) throws IOException {
323+
CTree cTree = null;
317324
File file = new File(filename);
318325
if (file.isDirectory()) {
319-
LOG.error("should not have any directories in inputList: "+file);
326+
this.PROJECT_LOG().error("should not have any directories in inputList: "+file);
320327
} else {
321328
if (output != null) {
322329
String name = FilenameUtils.getName(filename);
323-
if (CMDir.isReservedFilename(name)) {
324-
LOG.error(name+" is reserved for CMDir: (check that inputs are not already in a CMDir) "+file.getAbsolutePath());
330+
if (CTree.isReservedFilename(name)) {
331+
this.PROJECT_LOG().info(name+" is reserved for CTree: (check that inputs are not already in a CTree) "+file.getAbsolutePath());
325332
}
326-
String cmFilename = CMDir.getCMDirReservedFilenameForExtension(name);
333+
String cmFilename = CTree.getCTreeReservedFilenameForExtension(name);
327334
if (cmFilename == null) {
328-
LOG.error("Cannot create CMDir from this type of file: "+name);
335+
this.PROJECT_LOG().error("Cannot create CTree from this type of file: "+name);
329336
return null;
330337
}
331338
LOG.trace("Reserved filename: "+cmFilename);
332-
if (CMDir.isReservedDirectory(cmFilename)) {
333-
cmDir = makeCMDir(name);
334-
ensureReservedDirectoryAndCopyFile(cmDir, cmFilename, filename);
339+
if (CTree.isReservedDirectory(cmFilename)) {
340+
cTree = makeCTree(name);
341+
ensureReservedDirectoryAndCopyFile(cTree, cmFilename, filename);
335342
} else {
336-
cmDir = makeCMDir(name);
337-
File destFile = cmDir.getReservedFile(cmFilename);
343+
cTree = makeCTree(name);
344+
File destFile = cTree.getReservedFile(cmFilename);
338345
if (destFile != null) {
339346
FileUtils.copyFile(file, destFile);
340347
}
341348
}
342349
}
343350
}
344-
return cmDir;
351+
return cTree;
345352
}
346353

347-
private CMDir makeCMDir(String name) {
348-
CMDir cmDir;
354+
private CTree makeCTree(String name) {
355+
CTree cTree;
349356
String dirName = FilenameUtils.removeExtension(name);
350-
cmDir = createCMDir(dirName);
351-
return cmDir;
357+
cTree = createCTree(dirName);
358+
return cTree;
352359
}
353360

354-
private void ensureReservedDirectoryAndCopyFile(CMDir cmDir, String reservedFilename, String filename) {
355-
File reservedDir = new File(cmDir.getDirectory(), reservedFilename);
361+
private void ensureReservedDirectoryAndCopyFile(CTree cTree, String reservedFilename, String filename) {
362+
File reservedDir = new File(cTree.getDirectory(), reservedFilename);
356363
LOG.trace("Res "+reservedDir.getAbsolutePath());
357364
File orig = new File(filename);
358365
LOG.trace("Orig: "+orig.getAbsolutePath());
@@ -372,19 +379,19 @@ private void ensureReservedDirectoryAndCopyFile(CMDir cmDir, String reservedFile
372379

373380
}
374381

375-
private CMDir createCMDir(String dirName) {
376-
File cmDirFile = new File(output, dirName);
377-
CMDir cmDir = new CMDir(cmDirFile);
378-
cmDir.createDirectory(cmDirFile, false);
379-
return cmDir;
382+
private CTree createCTree(String dirName) {
383+
File cTreeFile = new File(output, dirName);
384+
CTree cTree = new CTree(cTreeFile);
385+
cTree.createDirectory(cTreeFile, false);
386+
return cTree;
380387
}
381388

382389
private void getOrCreateOutputDirectory() {
383390
if (output != null) {
384391
File outputDir = new File(output);
385392
if (outputDir.exists()) {
386393
if (!outputDir.isDirectory()) {
387-
throw new RuntimeException("cmDirRoot "+outputDir+" must be a directory");
394+
throw new RuntimeException("cTreeRoot "+outputDir+" must be a directory");
388395
}
389396
} else {
390397
outputDir.mkdirs();
@@ -439,10 +446,10 @@ public List<SectionTagger> getSectionTaggers() {
439446
*/
440447
public void parseArgs(String[] args) {
441448
super.parseArgs(args);
442-
createCMDirListFromInputList();
449+
createCTreeListFromInputList();
443450
}
444451

445-
public CMDir getCurrentCMDir() {
452+
public CTree getCurrentCMTree() {
446453
return currentCTree;
447454
}
448455

0 commit comments

Comments
 (0)